365 files changed, 8716 insertions, 3973 deletions
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index ab08852e53ae..7940da9bc6c1 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -503,6 +503,19 @@ valid index (starting from 0) pointing to a member or an argument.
  * ``info.vlen``: 0
  * ``type``: the type with ``btf_type_tag`` attribute
 
+Currently, ``BTF_KIND_TYPE_TAG`` is only emitted for pointer types.
+It has the following btf type chain:
+::
+
+  ptr -> [type_tag]*
+      -> [const | volatile | restrict | typedef]*
+      -> base_type
+
+Basically, a pointer type points to zero or more
+type_tag, then zero or more const/volatile/restrict/typedef
+and finally the base type. The base type is one of
+int, ptr, array, struct, union, enum, func_proto and float types.
+
 3. BTF Kernel API
 =================
 
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 3704836fe6df..5300837ac2c9 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -22,7 +22,13 @@ necessary across calls.
 Instruction encoding
 ====================
 
-eBPF uses 64-bit instructions with the following encoding:
+eBPF has two instruction encodings:
+
+ * the basic instruction encoding, which uses 64 bits to encode an instruction
+ * the wide instruction encoding, which appends a second 64-bit immediate value
+   (imm64) after the basic instruction for a total of 128 bits.
+
+The basic instruction encoding looks as follows:
 
  =============  =======  ===============  ====================  ============
  32 bits (MSB)  16 bits  4 bits           4 bits                8 bits (LSB)
@@ -82,9 +88,9 @@ BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
 otherwise identical operations.
 The code field encodes the operation as below:
 
-  ========  =====  ==========================
+  ========  =====  =================================================
   code      value  description
-  ========  =====  ==========================
+  ========  =====  =================================================
   BPF_ADD   0x00   dst += src
   BPF_SUB   0x10   dst -= src
   BPF_MUL   0x20   dst \*= src
@@ -98,8 +104,8 @@ The code field encodes the operation as below:
   BPF_XOR   0xa0   dst ^= src
   BPF_MOV   0xb0   dst = src
   BPF_ARSH  0xc0   sign extending shift right
-  BPF_END   0xd0   endianness conversion
-  ========  =====  ==========================
+  BPF_END   0xd0   byte swap operations (see separate section below)
+  ========  =====  =================================================
 
 BPF_ADD | BPF_X | BPF_ALU means::
 
@@ -118,6 +124,42 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
   src_reg = src_reg ^ imm32
 
 
+Byte swap instructions
+----------------------
+
+The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
+code field of ``BPF_END``.
+
+The byte swap instructions instructions operate on the destination register
+only and do not use a separate source register or immediate value.
+
+The 1-bit source operand field in the opcode is used to to select what byte
+order the operation convert from or to:
+
+  =========  =====  =================================================
+  source     value  description
+  =========  =====  =================================================
+  BPF_TO_LE  0x00   convert between host byte order and little endian
+  BPF_TO_BE  0x08   convert between host byte order and big endian
+  =========  =====  =================================================
+
+The imm field encodes the width of the swap operations.  The following widths
+are supported: 16, 32 and 64.
+
+Examples:
+
+``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means::
+
+  dst_reg = htole16(dst_reg)
+
+``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means::
+
+  dst_reg = htobe64(dst_reg)
+
+``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and
+``BPF_TO_LE`` respetively.
+
+
 Jump instructions
 -----------------
 
@@ -176,63 +218,96 @@ The mode modifier is one of:
   =============  =====  ====================================
   mode modifier  value  description
   =============  =====  ====================================
-  BPF_IMM        0x00   used for 64-bit mov
-  BPF_ABS        0x20   legacy BPF packet access
-  BPF_IND        0x40   legacy BPF packet access
-  BPF_MEM        0x60   all normal load and store operations
+  BPF_IMM        0x00   64-bit immediate instructions
+  BPF_ABS        0x20   legacy BPF packet access (absolute)
+  BPF_IND        0x40   legacy BPF packet access (indirect)
+  BPF_MEM        0x60   regular load and store operations
   BPF_ATOMIC     0xc0   atomic operations
   =============  =====  ====================================
 
-BPF_MEM | <size> | BPF_STX means::
+
+Regular load and store operations
+---------------------------------
+
+The ``BPF_MEM`` mode modifier is used to encode regular load and store
+instructions that transfer data between a register and memory.
+
+``BPF_MEM | <size> | BPF_STX`` means::
 
   *(size *) (dst_reg + off) = src_reg
 
-BPF_MEM | <size> | BPF_ST means::
+``BPF_MEM | <size> | BPF_ST`` means::
 
   *(size *) (dst_reg + off) = imm32
 
-BPF_MEM | <size> | BPF_LDX means::
+``BPF_MEM | <size> | BPF_LDX`` means::
 
   dst_reg = *(size *) (src_reg + off)
 
-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
+Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``.
 
 Atomic operations
 -----------------
 
-eBPF includes atomic operations, which use the immediate field for extra
-encoding::
+Atomic operations are operations that operate on memory and can not be
+interrupted or corrupted by other access to the same memory region
+by other eBPF programs or means outside of this specification.
+
+All atomic operations supported by eBPF are encoded as store operations
+that use the ``BPF_ATOMIC`` mode modifier as follows:
+
+  * ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
+  * ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations
+  * 8-bit and 16-bit wide atomic operations are not supported.
 
-   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
-   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
+The imm field is used to encode the actual atomic operation.
+Simple atomic operation use a subset of the values defined to encode
+arithmetic operations in the imm field to encode the atomic operation:
 
-The basic atomic operations supported are::
+  ========  =====  ===========
+  imm       value  description
+  ========  =====  ===========
+  BPF_ADD   0x00   atomic add
+  BPF_OR    0x40   atomic or
+  BPF_AND   0x50   atomic and
+  BPF_XOR   0xa0   atomic xor
+  ========  =====  ===========
 
-    BPF_ADD
-    BPF_AND
-    BPF_OR
-    BPF_XOR
 
-Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
-memory location addresed by ``dst_reg + off`` is atomically modified, with
-``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
-immediate, then these operations also overwrite ``src_reg`` with the
-value that was in memory before it was modified.
+``BPF_ATOMIC | BPF_W  | BPF_STX`` with imm = BPF_ADD means::
 
-The more special operations are::
+  *(u32 *)(dst_reg + off16) += src_reg
 
-    BPF_XCHG
+``BPF_ATOMIC | BPF_DW | BPF_STX`` with imm = BPF ADD means::
 
-This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
-off``. ::
+  *(u64 *)(dst_reg + off16) += src_reg
 
-    BPF_CMPXCHG
+``BPF_XADD`` is a deprecated name for ``BPF_ATOMIC | BPF_ADD``.
 
-This atomically compares the value addressed by ``dst_reg + off`` with
-``R0``. If they match it is replaced with ``src_reg``. In either case, the
-value that was there before is zero-extended and loaded back to ``R0``.
+In addition to the simple atomic operations, there also is a modifier and
+two complex atomic operations:
 
-Note that 1 and 2 byte atomic operations are not supported.
+  ===========  ================  ===========================
+  imm          value             description
+  ===========  ================  ===========================
+  BPF_FETCH    0x01              modifier: return old value
+  BPF_XCHG     0xe0 | BPF_FETCH  atomic exchange
+  BPF_CMPXCHG  0xf0 | BPF_FETCH  atomic compare and exchange
+  ===========  ================  ===========================
+
+The ``BPF_FETCH`` modifier is optional for simple atomic operations, and
+always set for the complex atomic operations.  If the ``BPF_FETCH`` flag
+is set, then the operation also overwrites ``src_reg`` with the value that
+was in memory before it was modified.
+
+The ``BPF_XCHG`` operation atomically exchanges ``src_reg`` with the value
+addressed by ``dst_reg + off``.
+
+The ``BPF_CMPXCHG`` operation atomically compares the value addressed by
+``dst_reg + off`` with ``R0``. If they match, the value addressed by
+``dst_reg + off`` is replaced with ``src_reg``. In either case, the
+value that was at ``dst_reg + off`` before the operation is zero-extended
+and loaded back to ``R0``.
 
 Clang can generate atomic instructions by default when ``-mcpu=v3`` is
 enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
@@ -240,40 +315,52 @@ Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable
 the atomics features, while keeping a lower ``-mcpu`` version, you can use
 ``-Xclang -target-feature -Xclang +alu32``.
 
-You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
-referring to the exclusive-add operation encoded when the immediate field is
-zero.
+64-bit immediate instructions
+-----------------------------
 
-16-byte instructions
---------------------
+Instructions with the ``BPF_IMM`` mode modifier use the wide instruction
+encoding for an extra imm64 value.
 
-eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
-of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
-instruction that loads 64-bit immediate value into a dst_reg.
+There is currently only one such instruction.
 
-Packet access instructions
---------------------------
+``BPF_LD | BPF_DW | BPF_IMM`` means::
 
-eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
-(BPF_IND | <size> | BPF_LD) which are used to access packet data.
+  dst_reg = imm64
 
-They had to be carried over from classic BPF to have strong performance of
-socket filters running in eBPF interpreter. These instructions can only
-be used when interpreter context is a pointer to ``struct sk_buff`` and
-have seven implicit operands. Register R6 is an implicit input that must
-contain pointer to sk_buff. Register R0 is an implicit output which contains
-the data fetched from the packet. Registers R1-R5 are scratch registers
-and must not be used to store the data across BPF_ABS | BPF_LD or
-BPF_IND | BPF_LD instructions.
 
-These instructions have implicit program exit condition as well. When
-eBPF program is trying to access the data beyond the packet boundary,
-the interpreter will abort the execution of the program. JIT compilers
-therefore must preserve this property. src_reg and imm32 fields are
-explicit inputs to these instructions.
+Legacy BPF Packet access instructions
+-------------------------------------
 
-For example, BPF_IND | BPF_W | BPF_LD means::
+eBPF has special instructions for access to packet data that have been
+carried over from classic BPF to retain the performance of legacy socket
+filters running in the eBPF interpreter.
 
-  R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
+The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and
+``BPF_IND | <size> | BPF_LD``.
 
-and R1 - R5 are clobbered.
+These instructions are used to access packet data and can only be used when
+the program context is a pointer to networking packet.  ``BPF_ABS``
+accesses packet data at an absolute offset specified by the immediate data
+and ``BPF_IND`` access packet data at an offset that includes the value of
+a register in addition to the immediate data.
+
+These instructions have seven implicit operands:
+
+ * Register R6 is an implicit input that must contain pointer to a
+   struct sk_buff.
+ * Register R0 is an implicit output which contains the data fetched from
+   the packet.
+ * Registers R1-R5 are scratch registers that are clobbered after a call to
+   ``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
+
+These instructions have an implicit program exit condition as well. When an
+eBPF program is trying to access the data beyond the packet boundary, the
+program execution will be aborted.
+
+``BPF_ABS | BPF_W | BPF_LD`` means::
+
+  R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + imm32))
+
+``BPF_IND | BPF_W | BPF_LD`` means::
+
+  R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index bda821065a2b..ee2ccacc39ff 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -45,8 +45,10 @@ properties:
 
       - items:
           - enum:
+              - renesas,r9a07g043-gbeth # RZ/G2UL
               - renesas,r9a07g044-gbeth # RZ/G2{L,LC}
-          - const: renesas,rzg2l-gbeth  # RZ/G2L
+              - renesas,r9a07g054-gbeth # RZ/V2L
+          - const: renesas,rzg2l-gbeth  # RZ/{G2L,G2UL,V2L} family
 
   reg: true
 
diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst
index 46f74bffce0f..c628cb5406d2 100644
--- a/Documentation/networking/mctp.rst
+++ b/Documentation/networking/mctp.rst
@@ -212,6 +212,54 @@ remote address is already known, or the message does not require a reply.
 Like the send calls, sockets will only receive responses to requests they have
 sent (TO=1) and may only respond (TO=0) to requests they have received.
 
+``ioctl(SIOCMCTPALLOCTAG)`` and ``ioctl(SIOCMCTPDROPTAG)``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These tags give applications more control over MCTP message tags, by allocating
+(and dropping) tag values explicitly, rather than the kernel automatically
+allocating a per-message tag at ``sendmsg()`` time.
+
+In general, you will only need to use these ioctls if your MCTP protocol does
+not fit the usual request/response model. For example, if you need to persist
+tags across multiple requests, or a request may generate more than one response.
+In these cases, the ioctls allow you to decouple the tag allocation (and
+release) from individual message send and receive operations.
+
+Both ioctls are passed a pointer to a ``struct mctp_ioc_tag_ctl``:
+
+.. code-block:: C
+
+    struct mctp_ioc_tag_ctl {
+        mctp_eid_t      peer_addr;
+        __u8		tag;
+        __u16   	flags;
+    };
+
+``SIOCMCTPALLOCTAG`` allocates a tag for a specific peer, which an application
+can use in future ``sendmsg()`` calls. The application populates the
+``peer_addr`` member with the remote EID. Other fields must be zero.
+
+On return, the ``tag`` member will be populated with the allocated tag value.
+The allocated tag will have the following tag bits set:
+
+ - ``MCTP_TAG_OWNER``: it only makes sense to allocate tags if you're the tag
+   owner
+
+ - ``MCTP_TAG_PREALLOC``: to indicate to ``sendmsg()`` that this is a
+   preallocated tag.
+
+ - ... and the actual tag value, within the least-significant three bits
+   (``MCTP_TAG_MASK``). Note that zero is a valid tag value.
+
+The tag value should be used as-is for the ``smctp_tag`` member of ``struct
+sockaddr_mctp``.
+
+``SIOCMCTPDROPTAG`` releases a tag that has been previously allocated by a
+``SIOCMCTPALLOCTAG`` ioctl. The ``peer_addr`` must be the same as used for the
+allocation, and the ``tag`` value must match exactly the tag returned from the
+allocation (including the ``MCTP_TAG_OWNER`` and ``MCTP_TAG_PREALLOC`` bits).
+The ``flags`` field must be zero.
+
 Kernel internals
 ================
 
diff --git a/MAINTAINERS b/MAINTAINERS
index c7169da15ab4..e70f5b26bc50 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3523,6 +3523,8 @@ F:	net/sched/act_bpf.c
 F:	net/sched/cls_bpf.c
 F:	samples/bpf/
 F:	scripts/bpf_doc.py
+F:	scripts/pahole-flags.sh
+F:	scripts/pahole-version.sh
 F:	tools/bpf/
 F:	tools/lib/bpf/
 F:	tools/testing/selftests/bpf/
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index e96d4d87291f..74f9a9b6a053 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1143,6 +1143,11 @@ out:
 	return prog;
 }
 
+bool bpf_jit_supports_kfunc_call(void)
+{
+	return true;
+}
+
 u64 bpf_jit_alloc_exec_limit(void)
 {
 	return VMALLOC_END - VMALLOC_START;
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 56dd1f4e3e44..a4f4d347e6bd 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -264,7 +264,7 @@ skip_codegen_passes:
 	fp->jited = 1;
 	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
 
-	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
 	if (!fp->is_func || extra_pass) {
 		bpf_jit_binary_lock_ro(bpf_hdr);
 		bpf_prog_fill_jited_linfo(fp, addrs);
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index b1e38784eb23..fa0759bfe498 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1599,7 +1599,7 @@ skip_init_ctx:
 	if (bpf_jit_enable > 1)
 		bpf_jit_dump(prog->len, image_size, pass, ctx.image);
 
-	bpf_flush_icache(header, (u8 *)header + (header->pages * PAGE_SIZE));
+	bpf_flush_icache(header, (u8 *)header + header->size);
 
 	if (!prog->is_func || extra_pass) {
 		bpf_jit_binary_lock_ro(header);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660..995f2dc28631 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -158,6 +158,7 @@ config X86
 	select HAVE_ALIGNED_STRUCT_PAGE		if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
+	select HAVE_ARCH_HUGE_VMALLOC		if HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN			if X86_64
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index b7421780e4e9..4cc18ba1b75e 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -44,6 +44,7 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void text_poke_sync(void);
 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
 extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5007c3ffe96f..018b61febf0e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1102,6 +1102,40 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
 	return __text_poke(addr, opcode, len);
 }
 
+/**
+ * text_poke_copy - Copy instructions into (an unused part of) RX memory
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy, could be more than 2x PAGE_SIZE
+ *
+ * Not safe against concurrent execution; useful for JITs to dump
+ * new code blocks into unused regions of RX memory. Can be used in
+ * conjunction with synchronize_rcu_tasks() to wait for existing
+ * execution to quiesce after having made sure no existing functions
+ * pointers are live.
+ */
+void *text_poke_copy(void *addr, const void *opcode, size_t len)
+{
+	unsigned long start = (unsigned long)addr;
+	size_t patched = 0;
+
+	if (WARN_ON_ONCE(core_kernel_text(start)))
+		return NULL;
+
+	mutex_lock(&text_mutex);
+	while (patched < len) {
+		unsigned long ptr = start + patched;
+		size_t s;
+
+		s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
+
+		__text_poke((void *)ptr, opcode + patched, s);
+		patched += s;
+	}
+	mutex_unlock(&text_mutex);
+	return addr;
+}
+
 static void do_sync_core(void *info)
 {
 	sync_core();
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2b1e266ff95c..c7db0fe4de2f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -330,8 +330,7 @@ static int emit_jump(u8 **pprog, void *func, void *ip)
 }
 
 static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
-				void *old_addr, void *new_addr,
-				const bool text_live)
+				void *old_addr, void *new_addr)
 {
 	const u8 *nop_insn = x86_nops[5];
 	u8 old_insn[X86_PATCH_SIZE];
@@ -365,10 +364,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		goto out;
 	ret = 1;
 	if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
-		if (text_live)
-			text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
-		else
-			memcpy(ip, new_insn, X86_PATCH_SIZE);
+		text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
 		ret = 0;
 	}
 out:
@@ -384,7 +380,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		/* BPF poking in modules is not supported */
 		return -EINVAL;
 
-	return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+	return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
 }
 
 #define EMIT_LFENCE()	EMIT3(0x0F, 0xAE, 0xE8)
@@ -558,24 +554,15 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
 		mutex_lock(&array->aux->poke_mutex);
 		target = array->ptrs[poke->tail_call.key];
 		if (target) {
-			/* Plain memcpy is used when image is not live yet
-			 * and still not locked as read-only. Once poke
-			 * location is active (poke->tailcall_target_stable),
-			 * any parallel bpf_arch_text_poke() might occur
-			 * still on the read-write image until we finally
-			 * locked it as read-only. Both modifications on
-			 * the given image are under text_mutex to avoid
-			 * interference.
-			 */
 			ret = __bpf_arch_text_poke(poke->tailcall_target,
 						   BPF_MOD_JUMP, NULL,
 						   (u8 *)target->bpf_func +
-						   poke->adj_off, false);
+						   poke->adj_off);
 			BUG_ON(ret < 0);
 			ret = __bpf_arch_text_poke(poke->tailcall_bypass,
 						   BPF_MOD_JUMP,
 						   (u8 *)poke->tailcall_target +
-						   X86_PATCH_SIZE, NULL, false);
+						   X86_PATCH_SIZE, NULL);
 			BUG_ON(ret < 0);
 		}
 		WRITE_ONCE(poke->tailcall_target_stable, true);
@@ -787,7 +774,6 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
 	/* emit opcode */
 	switch (atomic_op) {
 	case BPF_ADD:
-	case BPF_SUB:
 	case BPF_AND:
 	case BPF_OR:
 	case BPF_XOR:
@@ -867,7 +853,7 @@ static void emit_nops(u8 **pprog, int len)
 
 #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
 
-static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
 		  int oldproglen, struct jit_context *ctx, bool jmp_padding)
 {
 	bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
@@ -894,8 +880,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	push_callee_regs(&prog, callee_regs_used);
 
 	ilen = prog - temp;
-	if (image)
-		memcpy(image + proglen, temp, ilen);
+	if (rw_image)
+		memcpy(rw_image + proglen, temp, ilen);
 	proglen += ilen;
 	addrs[0] = proglen;
 	prog = temp;
@@ -1324,6 +1310,9 @@ st:			if (is_imm8(insn->off))
 					pr_err("extable->insn doesn't fit into 32-bit\n");
 					return -EFAULT;
 				}
+				/* switch ex to rw buffer for writes */
+				ex = (void *)rw_image + ((void *)ex - (void *)image);
+
 				ex->insn = delta;
 
 				ex->data = EX_TYPE_BPF;
@@ -1706,7 +1695,7 @@ emit_jmp:
 				pr_err("bpf_jit: fatal error\n");
 				return -EFAULT;
 			}
-			memcpy(image + proglen, temp, ilen);
+			memcpy(rw_image + proglen, temp, ilen);
 		}
 		proglen += ilen;
 		addrs[i] = proglen;
@@ -2247,6 +2236,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
 }
 
 struct x64_jit_data {
+	struct bpf_binary_header *rw_header;
 	struct bpf_binary_header *header;
 	int *addrs;
 	u8 *image;
@@ -2259,6 +2249,7 @@ struct x64_jit_data {
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
+	struct bpf_binary_header *rw_header = NULL;
 	struct bpf_binary_header *header = NULL;
 	struct bpf_prog *tmp, *orig_prog = prog;
 	struct x64_jit_data *jit_data;
@@ -2267,6 +2258,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	bool tmp_blinded = false;
 	bool extra_pass = false;
 	bool padding = false;
+	u8 *rw_image = NULL;
 	u8 *image = NULL;
 	int *addrs;
 	int pass;
@@ -2302,6 +2294,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		oldproglen = jit_data->proglen;
 		image = jit_data->image;
 		header = jit_data->header;
+		rw_header = jit_data->rw_header;
+		rw_image = (void *)rw_header + ((void *)image - (void *)header);
 		extra_pass = true;
 		padding = true;
 		goto skip_init_addrs;
@@ -2332,12 +2326,12 @@ skip_init_addrs:
 	for (pass = 0; pass < MAX_PASSES || image; pass++) {
 		if (!padding && pass >= PADDING_PASSES)
 			padding = true;
-		proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding);
+		proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding);
 		if (proglen <= 0) {
 out_image:
 			image = NULL;
 			if (header)
-				bpf_jit_binary_free(header);
+				bpf_jit_binary_pack_free(header, rw_header);
 			prog = orig_prog;
 			goto out_addrs;
 		}
@@ -2361,8 +2355,9 @@ out_image:
 				sizeof(struct exception_table_entry);
 
 			/* allocate module memory for x86 insns and extable */
-			header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
-						      &image, align, jit_fill_hole);
+			header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size,
+							   &image, align, &rw_header, &rw_image,
+							   jit_fill_hole);
 			if (!header) {
 				prog = orig_prog;
 				goto out_addrs;
@@ -2378,14 +2373,26 @@ out_image:
 
 	if (image) {
 		if (!prog->is_func || extra_pass) {
+			/*
+			 * bpf_jit_binary_pack_finalize fails in two scenarios:
+			 *   1) header is not pointing to proper module memory;
+			 *   2) the arch doesn't support bpf_arch_text_copy().
+			 *
+			 * Both cases are serious bugs and justify WARN_ON.
+			 */
+			if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) {
+				prog = orig_prog;
+				goto out_addrs;
+			}
+
 			bpf_tail_call_direct_fixup(prog);
-			bpf_jit_binary_lock_ro(header);
 		} else {
 			jit_data->addrs = addrs;
 			jit_data->ctx = ctx;
 			jit_data->proglen = proglen;
 			jit_data->image = image;
 			jit_data->header = header;
+			jit_data->rw_header = rw_header;
 		}
 		prog->bpf_func = (void *)image;
 		prog->jited = 1;
@@ -2413,3 +2420,10 @@ bool bpf_jit_supports_kfunc_call(void)
 {
 	return true;
 }
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+	if (text_poke_copy(dst, src, len) == NULL)
+		return ERR_PTR(-EINVAL);
+	return dst;
+}
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c98a4b0a8453..303c8d32d451 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <net/arp.h>
 #include <net/ipv6.h>
+#include <net/ndisc.h>
 #include <asm/byteorder.h>
 #include <net/bonding.h>
 #include <net/bond_alb.h>
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 238b56d77c36..617c2bf8c5a7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -6048,27 +6048,38 @@ static int __net_init bond_net_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit bond_net_exit(struct net *net)
+static void __net_exit bond_net_exit_batch(struct list_head *net_list)
 {
-	struct bond_net *bn = net_generic(net, bond_net_id);
-	struct bonding *bond, *tmp_bond;
+	struct bond_net *bn;
+	struct net *net;
 	LIST_HEAD(list);
 
-	bond_destroy_sysfs(bn);
+	list_for_each_entry(net, net_list, exit_list) {
+		bn = net_generic(net, bond_net_id);
+		bond_destroy_sysfs(bn);
+	}
 
 	/* Kill off any bonds created after unregistering bond rtnl ops */
 	rtnl_lock();
-	list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
-		unregister_netdevice_queue(bond->dev, &list);
+	list_for_each_entry(net, net_list, exit_list) {
+		struct bonding *bond, *tmp_bond;
+
+		bn = net_generic(net, bond_net_id);
+		list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
+			unregister_netdevice_queue(bond->dev, &list);
+	}
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
 
-	bond_destroy_proc_dir(bn);
+	list_for_each_entry(net, net_list, exit_list) {
+		bn = net_generic(net, bond_net_id);
+		bond_destroy_proc_dir(bn);
+	}
 }
 
 static struct pernet_operations bond_net_ops = {
 	.init = bond_net_init,
-	.exit = bond_net_exit,
+	.exit_batch = bond_net_exit_batch,
 	.id   = &bond_net_id,
 	.size = sizeof(struct bond_net),
 };
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 46b150e6289e..cfe37be42be4 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -307,7 +307,6 @@ void __net_init bond_create_proc_dir(struct bond_net *bn)
 }
 
 /* Destroy the bonding directory under /proc/net, if empty.
- * Caller must hold rtnl_lock.
  */
 void __net_exit bond_destroy_proc_dir(struct bond_net *bn)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 7670796c2aa1..c54649c4c3a0 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -585,7 +585,7 @@ static void mv88e6185_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
 {
 	u8 cmode = chip->ports[port].cmode;
 
-	if (cmode <= ARRAY_SIZE(mv88e6185_phy_interface_modes) &&
+	if (cmode < ARRAY_SIZE(mv88e6185_phy_interface_modes) &&
 	    mv88e6185_phy_interface_modes[cmode])
 		__set_bit(mv88e6185_phy_interface_modes[cmode],
 			  config->supported_interfaces);
@@ -2602,7 +2602,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
 
 	err = mv88e6xxx_port_set_map_da(chip, port, true);
 	if (err)
-		return err;
+		goto unlock;
 
 	err = mv88e6xxx_port_commit_pvid(chip, port);
 	if (err)
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 83066af3757f..bb2fbb6826ac 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1018,7 +1018,7 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy,
 
 	clear_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &clear_val,
 					    QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val));
-	if (!write_skb) {
+	if (!clear_skb) {
 		ret = -ENOMEM;
 		goto err_clear_skb;
 	}
diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig
index 5242698143d9..b7427a8292b2 100644
--- a/drivers/net/dsa/realtek/Kconfig
+++ b/drivers/net/dsa/realtek/Kconfig
@@ -12,7 +12,6 @@ menuconfig NET_DSA_REALTEK
 config NET_DSA_REALTEK_MDIO
 	tristate "Realtek MDIO connected switch driver"
 	depends on NET_DSA_REALTEK
-	default y
 	help
 	  Select to enable support for registering switches configured
 	  through MDIO.
@@ -20,14 +19,12 @@ config NET_DSA_REALTEK_MDIO
 config NET_DSA_REALTEK_SMI
 	tristate "Realtek SMI connected switch driver"
 	depends on NET_DSA_REALTEK
-	default y
 	help
 	  Select to enable support for registering switches connected
 	  through SMI.
 
 config NET_DSA_REALTEK_RTL8365MB
 	tristate "Realtek RTL8365MB switch subdriver"
-	default y
 	depends on NET_DSA_REALTEK
 	depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
 	select NET_DSA_TAG_RTL8_4
@@ -36,7 +33,6 @@ config NET_DSA_REALTEK_RTL8365MB
 
 config NET_DSA_REALTEK_RTL8366RB
 	tristate "Realtek RTL8366RB switch subdriver"
-	default y
 	depends on NET_DSA_REALTEK
 	depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
 	select NET_DSA_TAG_RTL4_A
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index e1c5a67a21c4..2ed592147c20 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -952,39 +952,17 @@ static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port,
 	return false;
 }
 
-static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port,
-				       unsigned long *supported,
-				       struct phylink_link_state *state)
+static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port,
+				       struct phylink_config *config)
 {
-	struct realtek_priv *priv = ds->priv;
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0 };
-
-	/* include/linux/phylink.h says:
-	 *     When @state->interface is %PHY_INTERFACE_MODE_NA, phylink
-	 *     expects the MAC driver to return all supported link modes.
-	 */
-	if (state->interface != PHY_INTERFACE_MODE_NA &&
-	    !rtl8365mb_phy_mode_supported(ds, port, state->interface)) {
-		dev_err(priv->dev, "phy mode %s is unsupported on port %d\n",
-			phy_modes(state->interface), port);
-		linkmode_zero(supported);
-		return;
-	}
-
-	phylink_set_port_modes(mask);
-
-	phylink_set(mask, Autoneg);
-	phylink_set(mask, Pause);
-	phylink_set(mask, Asym_Pause);
-
-	phylink_set(mask, 10baseT_Half);
-	phylink_set(mask, 10baseT_Full);
-	phylink_set(mask, 100baseT_Half);
-	phylink_set(mask, 100baseT_Full);
-	phylink_set(mask, 1000baseT_Full);
-
-	linkmode_and(supported, supported, mask);
-	linkmode_and(state->advertising, state->advertising, mask);
+	if (dsa_is_user_port(ds, port))
+		__set_bit(PHY_INTERFACE_MODE_INTERNAL,
+			  config->supported_interfaces);
+	else if (dsa_is_cpu_port(ds, port))
+		phy_interface_set_rgmii(config->supported_interfaces);
+
+	config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
+				   MAC_10 | MAC_100 | MAC_1000FD;
 }
 
 static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port,
@@ -2020,7 +1998,7 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = {
 	.get_tag_protocol = rtl8365mb_get_tag_protocol,
 	.setup = rtl8365mb_setup,
 	.teardown = rtl8365mb_teardown,
-	.phylink_validate = rtl8365mb_phylink_validate,
+	.phylink_get_caps = rtl8365mb_phylink_get_caps,
 	.phylink_mac_config = rtl8365mb_phylink_mac_config,
 	.phylink_mac_link_down = rtl8365mb_phylink_mac_link_down,
 	.phylink_mac_link_up = rtl8365mb_phylink_mac_link_up,
@@ -2038,7 +2016,7 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = {
 	.get_tag_protocol = rtl8365mb_get_tag_protocol,
 	.setup = rtl8365mb_setup,
 	.teardown = rtl8365mb_teardown,
-	.phylink_validate = rtl8365mb_phylink_validate,
+	.phylink_get_caps = rtl8365mb_phylink_get_caps,
 	.phylink_mac_config = rtl8365mb_phylink_mac_config,
 	.phylink_mac_link_down = rtl8365mb_phylink_mac_link_down,
 	.phylink_mac_link_up = rtl8365mb_phylink_mac_link_up,
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 8aec5d9fbfef..ad57209007e1 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -138,11 +138,6 @@ MODULE_PARM_DESC(use_mmio, "Use MMIO (1) or PIO(0) to access the NIC. "
 module_param(rx_copybreak, int, 0);
 module_param(use_mmio, int, 0);
 
-#if defined(NETIF_F_TSO) && MAX_SKB_FRAGS > 32
-#warning Typhoon only supports 32 entries in its SG list for TSO, disabling TSO
-#undef NETIF_F_TSO
-#endif
-
 #if TXLO_ENTRIES <= (2 * MAX_SKB_FRAGS)
 #error TX ring too small!
 #endif
@@ -2261,9 +2256,28 @@ out:
 	return mode;
 }
 
+#if MAX_SKB_FRAGS > 32
+
+#include <net/vxlan.h>
+
+static netdev_features_t typhoon_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
+{
+	if (skb_shinfo(skb)->nr_frags > 32 && skb_is_gso(skb))
+		features &= ~NETIF_F_GSO_MASK;
+
+	features = vlan_features_check(skb, features);
+	return vxlan_features_check(skb, features);
+}
+#endif
+
 static const struct net_device_ops typhoon_netdev_ops = {
 	.ndo_open		= typhoon_open,
 	.ndo_stop		= typhoon_close,
+#if MAX_SKB_FRAGS > 32
+	.ndo_features_check	= typhoon_features_check,
+#endif
 	.ndo_start_xmit		= typhoon_start_tx,
 	.ndo_set_rx_mode	= typhoon_set_rx_mode,
 	.ndo_tx_timeout		= typhoon_tx_timeout,
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 537e6a85e18d..fbf4588994ac 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -2413,11 +2413,13 @@ static void et131x_tx_dma_memory_free(struct et131x_adapter *adapter)
 	kfree(tx_ring->tcb_ring);
 }
 
+#define MAX_TX_DESC_PER_PKT 24
+
 /* nic_send_packet - NIC specific send handler for version B silicon. */
 static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb)
 {
 	u32 i;
-	struct tx_desc desc[24];
+	struct tx_desc desc[MAX_TX_DESC_PER_PKT];
 	u32 frag = 0;
 	u32 thiscopy, remainder;
 	struct sk_buff *skb = tcb->skb;
@@ -2432,9 +2434,6 @@ static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb)
 	 * more than 5 fragments.
 	 */
 
-	/* nr_frags should be no more than 18. */
-	BUILD_BUG_ON(MAX_SKB_FRAGS + 1 > 23);
-
 	memset(desc, 0, sizeof(struct tx_desc) * (nr_frags + 1));
 
 	for (i = 0; i < nr_frags; i++) {
@@ -3762,6 +3761,13 @@ static netdev_tx_t et131x_tx(struct sk_buff *skb, struct net_device *netdev)
 	struct et131x_adapter *adapter = netdev_priv(netdev);
 	struct tx_ring *tx_ring = &adapter->tx_ring;
 
+	/* This driver does not support TSO, it is very unlikely
+	 * this condition is true.
+	 */
+	if (unlikely(skb_shinfo(skb)->nr_frags > MAX_TX_DESC_PER_PKT - 2)) {
+		if (skb_linearize(skb))
+			goto drop_err;
+	}
 	/* stop the queue if it's getting full */
 	if (tx_ring->used >= NUM_TCB - 1 && !netif_queue_stopped(netdev))
 		netif_stop_queue(netdev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index a19dd6797070..447a75ea0cc1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1271,7 +1271,7 @@ struct bnx2x_fw_stats_data {
 	struct per_port_stats		port;
 	struct per_pf_stats		pf;
 	struct fcoe_statistics_params	fcoe;
-	struct per_queue_stats		queue_stats[1];
+	struct per_queue_stats		queue_stats[];
 };
 
 /* Public slow path states */
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index ba28aa444e5a..8e07192e409f 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1539,7 +1539,7 @@ static int liquidio_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 	 * compute the delta in terms of coprocessor clocks.
 	 */
 	delta = (u64)ppb << 32;
-	do_div(delta, oct->coproc_clock_rate);
+	div64_u64(delta, oct->coproc_clock_rate);
 
 	spin_lock_irqsave(&lio->ptp_lock, flags);
 	comp = lio_pci_readq(oct, CN6XXX_MIO_PTP_CLOCK_COMP);
@@ -1672,7 +1672,7 @@ static void liquidio_ptp_init(struct octeon_device *oct)
 	u64 clock_comp, cfg;
 
 	clock_comp = (u64)NSEC_PER_SEC << 32;
-	do_div(clock_comp, oct->coproc_clock_rate);
+	div64_u64(clock_comp, oct->coproc_clock_rate);
 	lio_pci_writeq(oct, clock_comp, CN6XXX_MIO_PTP_CLOCK_COMP);
 
 	/* Enable */
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index c710dc17be90..8dd7bf9014ec 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -340,7 +340,7 @@ enum wake_event_bits {
 struct netdev_desc {
 	__le32 next_desc;
 	__le32 status;
-	struct desc_frag { __le32 addr, length; } frag[1];
+	struct desc_frag { __le32 addr, length; } frag;
 };
 
 /* Bits in netdev_desc.status */
@@ -980,8 +980,8 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue)
 				le32_to_cpu(np->tx_ring[i].next_desc),
 				le32_to_cpu(np->tx_ring[i].status),
 				(le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff,
-				le32_to_cpu(np->tx_ring[i].frag[0].addr),
-				le32_to_cpu(np->tx_ring[i].frag[0].length));
+				le32_to_cpu(np->tx_ring[i].frag.addr),
+				le32_to_cpu(np->tx_ring[i].frag.length));
 		}
 		printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n",
 			ioread32(np->base + TxListPtr),
@@ -1027,7 +1027,7 @@ static void init_ring(struct net_device *dev)
 		np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma +
 			((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring));
 		np->rx_ring[i].status = 0;
-		np->rx_ring[i].frag[0].length = 0;
+		np->rx_ring[i].frag.length = 0;
 		np->rx_skbuff[i] = NULL;
 	}
 
@@ -1039,16 +1039,16 @@ static void init_ring(struct net_device *dev)
 		if (skb == NULL)
 			break;
 		skb_reserve(skb, 2);	/* 16 byte align the IP header. */
-		np->rx_ring[i].frag[0].addr = cpu_to_le32(
+		np->rx_ring[i].frag.addr = cpu_to_le32(
 			dma_map_single(&np->pci_dev->dev, skb->data,
 				np->rx_buf_sz, DMA_FROM_DEVICE));
 		if (dma_mapping_error(&np->pci_dev->dev,
-					np->rx_ring[i].frag[0].addr)) {
+					np->rx_ring[i].frag.addr)) {
 			dev_kfree_skb(skb);
 			np->rx_skbuff[i] = NULL;
 			break;
 		}
-		np->rx_ring[i].frag[0].length = cpu_to_le32(np->rx_buf_sz | LastFrag);
+		np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag);
 	}
 	np->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
 
@@ -1097,12 +1097,12 @@ start_tx (struct sk_buff *skb, struct net_device *dev)
 
 	txdesc->next_desc = 0;
 	txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign);
-	txdesc->frag[0].addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev,
+	txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev,
 				skb->data, skb->len, DMA_TO_DEVICE));
 	if (dma_mapping_error(&np->pci_dev->dev,
-				txdesc->frag[0].addr))
+				txdesc->frag.addr))
 			goto drop_frame;
-	txdesc->frag[0].length = cpu_to_le32 (skb->len | LastFrag);
+	txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag);
 
 	/* Increment cur_tx before tasklet_schedule() */
 	np->cur_tx++;
@@ -1151,7 +1151,7 @@ reset_tx (struct net_device *dev)
 		skb = np->tx_skbuff[i];
 		if (skb) {
 			dma_unmap_single(&np->pci_dev->dev,
-				le32_to_cpu(np->tx_ring[i].frag[0].addr),
+				le32_to_cpu(np->tx_ring[i].frag.addr),
 				skb->len, DMA_TO_DEVICE);
 			dev_kfree_skb_any(skb);
 			np->tx_skbuff[i] = NULL;
@@ -1271,12 +1271,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 				skb = np->tx_skbuff[entry];
 				/* Free the original skb. */
 				dma_unmap_single(&np->pci_dev->dev,
-					le32_to_cpu(np->tx_ring[entry].frag[0].addr),
+					le32_to_cpu(np->tx_ring[entry].frag.addr),
 					skb->len, DMA_TO_DEVICE);
 				dev_consume_skb_irq(np->tx_skbuff[entry]);
 				np->tx_skbuff[entry] = NULL;
-				np->tx_ring[entry].frag[0].addr = 0;
-				np->tx_ring[entry].frag[0].length = 0;
+				np->tx_ring[entry].frag.addr = 0;
+				np->tx_ring[entry].frag.length = 0;
 			}
 			spin_unlock(&np->lock);
 		} else {
@@ -1290,12 +1290,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 				skb = np->tx_skbuff[entry];
 				/* Free the original skb. */
 				dma_unmap_single(&np->pci_dev->dev,
-					le32_to_cpu(np->tx_ring[entry].frag[0].addr),
+					le32_to_cpu(np->tx_ring[entry].frag.addr),
 					skb->len, DMA_TO_DEVICE);
 				dev_consume_skb_irq(np->tx_skbuff[entry]);
 				np->tx_skbuff[entry] = NULL;
-				np->tx_ring[entry].frag[0].addr = 0;
-				np->tx_ring[entry].frag[0].length = 0;
+				np->tx_ring[entry].frag.addr = 0;
+				np->tx_ring[entry].frag.length = 0;
 			}
 			spin_unlock(&np->lock);
 		}
@@ -1372,16 +1372,16 @@ static void rx_poll(struct tasklet_struct *t)
 			    (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				dma_sync_single_for_cpu(&np->pci_dev->dev,
-						le32_to_cpu(desc->frag[0].addr),
+						le32_to_cpu(desc->frag.addr),
 						np->rx_buf_sz, DMA_FROM_DEVICE);
 				skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
 				dma_sync_single_for_device(&np->pci_dev->dev,
-						le32_to_cpu(desc->frag[0].addr),
+						le32_to_cpu(desc->frag.addr),
 						np->rx_buf_sz, DMA_FROM_DEVICE);
 				skb_put(skb, pkt_len);
 			} else {
 				dma_unmap_single(&np->pci_dev->dev,
-					le32_to_cpu(desc->frag[0].addr),
+					le32_to_cpu(desc->frag.addr),
 					np->rx_buf_sz, DMA_FROM_DEVICE);
 				skb_put(skb = np->rx_skbuff[entry], pkt_len);
 				np->rx_skbuff[entry] = NULL;
@@ -1427,18 +1427,18 @@ static void refill_rx (struct net_device *dev)
 			if (skb == NULL)
 				break;		/* Better luck next round. */
 			skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
-			np->rx_ring[entry].frag[0].addr = cpu_to_le32(
+			np->rx_ring[entry].frag.addr = cpu_to_le32(
 				dma_map_single(&np->pci_dev->dev, skb->data,
 					np->rx_buf_sz, DMA_FROM_DEVICE));
 			if (dma_mapping_error(&np->pci_dev->dev,
-				    np->rx_ring[entry].frag[0].addr)) {
+				    np->rx_ring[entry].frag.addr)) {
 			    dev_kfree_skb_irq(skb);
 			    np->rx_skbuff[entry] = NULL;
 			    break;
 			}
 		}
 		/* Perhaps we need not reset this field. */
-		np->rx_ring[entry].frag[0].length =
+		np->rx_ring[entry].frag.length =
 			cpu_to_le32(np->rx_buf_sz | LastFrag);
 		np->rx_ring[entry].status = 0;
 		cnt++;
@@ -1870,14 +1870,14 @@ static int netdev_close(struct net_device *dev)
 			   (int)(np->tx_ring_dma));
 		for (i = 0; i < TX_RING_SIZE; i++)
 			printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n",
-				   i, np->tx_ring[i].status, np->tx_ring[i].frag[0].addr,
-				   np->tx_ring[i].frag[0].length);
+				   i, np->tx_ring[i].status, np->tx_ring[i].frag.addr,
+				   np->tx_ring[i].frag.length);
 		printk(KERN_DEBUG "  Rx ring %8.8x:\n",
 			   (int)(np->rx_ring_dma));
 		for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) {
 			printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n",
-				   i, np->rx_ring[i].status, np->rx_ring[i].frag[0].addr,
-				   np->rx_ring[i].frag[0].length);
+				   i, np->rx_ring[i].status, np->rx_ring[i].frag.addr,
+				   np->rx_ring[i].frag.length);
 		}
 	}
 #endif /* __i386__ debugging only */
@@ -1892,19 +1892,19 @@ static int netdev_close(struct net_device *dev)
 		skb = np->rx_skbuff[i];
 		if (skb) {
 			dma_unmap_single(&np->pci_dev->dev,
-				le32_to_cpu(np->rx_ring[i].frag[0].addr),
+				le32_to_cpu(np->rx_ring[i].frag.addr),
 				np->rx_buf_sz, DMA_FROM_DEVICE);
 			dev_kfree_skb(skb);
 			np->rx_skbuff[i] = NULL;
 		}
-		np->rx_ring[i].frag[0].addr = cpu_to_le32(0xBADF00D0); /* poison */
+		np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */
 	}
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		np->tx_ring[i].next_desc = 0;
 		skb = np->tx_skbuff[i];
 		if (skb) {
 			dma_unmap_single(&np->pci_dev->dev,
-				le32_to_cpu(np->tx_ring[i].frag[0].addr),
+				le32_to_cpu(np->tx_ring[i].frag.addr),
 				skb->len, DMA_TO_DEVICE);
 			dev_kfree_skb(skb);
 			np->tx_skbuff[i] = NULL;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index e985ae008a97..88534aa29af2 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -18,6 +18,7 @@
 #include <linux/ptp_classify.h>
 #include <net/pkt_cls.h>
 #include <net/sock.h>
+#include <net/tso.h>
 
 #include "dpaa2-eth.h"
 
@@ -760,6 +761,39 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv,
 	}
 }
 
+static void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv)
+{
+	struct dpaa2_eth_sgt_cache *sgt_cache;
+	void *sgt_buf = NULL;
+	int sgt_buf_size;
+
+	sgt_cache = this_cpu_ptr(priv->sgt_cache);
+	sgt_buf_size = priv->tx_data_offset +
+		DPAA2_ETH_SG_ENTRIES_MAX * sizeof(struct dpaa2_sg_entry);
+
+	if (sgt_cache->count == 0)
+		sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN);
+	else
+		sgt_buf = sgt_cache->buf[--sgt_cache->count];
+	if (!sgt_buf)
+		return NULL;
+
+	memset(sgt_buf, 0, sgt_buf_size);
+
+	return sgt_buf;
+}
+
+static void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf)
+{
+	struct dpaa2_eth_sgt_cache *sgt_cache;
+
+	sgt_cache = this_cpu_ptr(priv->sgt_cache);
+	if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
+		skb_free_frag(sgt_buf);
+	else
+		sgt_cache->buf[sgt_cache->count++] = sgt_buf;
+}
+
 /* Create a frame descriptor based on a fragmented skb */
 static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
 				 struct sk_buff *skb,
@@ -805,12 +839,11 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
 	/* Prepare the HW SGT structure */
 	sgt_buf_size = priv->tx_data_offset +
 		       sizeof(struct dpaa2_sg_entry) *  num_dma_bufs;
-	sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN);
+	sgt_buf = dpaa2_eth_sgt_get(priv);
 	if (unlikely(!sgt_buf)) {
 		err = -ENOMEM;
 		goto sgt_buf_alloc_failed;
 	}
-	memset(sgt_buf, 0, sgt_buf_size);
 
 	sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
 
@@ -846,6 +879,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
 		err = -ENOMEM;
 		goto dma_map_single_failed;
 	}
+	memset(fd, 0, sizeof(struct dpaa2_fd));
 	dpaa2_fd_set_offset(fd, priv->tx_data_offset);
 	dpaa2_fd_set_format(fd, dpaa2_fd_sg);
 	dpaa2_fd_set_addr(fd, addr);
@@ -855,7 +889,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
 	return 0;
 
 dma_map_single_failed:
-	skb_free_frag(sgt_buf);
+	dpaa2_eth_sgt_recycle(priv, sgt_buf);
 sgt_buf_alloc_failed:
 	dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL);
 dma_map_sg_failed:
@@ -875,7 +909,6 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
 					    void **swa_addr)
 {
 	struct device *dev = priv->net_dev->dev.parent;
-	struct dpaa2_eth_sgt_cache *sgt_cache;
 	struct dpaa2_sg_entry *sgt;
 	struct dpaa2_eth_swa *swa;
 	dma_addr_t addr, sgt_addr;
@@ -884,18 +917,10 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
 	int err;
 
 	/* Prepare the HW SGT structure */
-	sgt_cache = this_cpu_ptr(priv->sgt_cache);
 	sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry);
-
-	if (sgt_cache->count == 0)
-		sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN,
-				  GFP_ATOMIC);
-	else
-		sgt_buf = sgt_cache->buf[--sgt_cache->count];
+	sgt_buf = dpaa2_eth_sgt_get(priv);
 	if (unlikely(!sgt_buf))
 		return -ENOMEM;
-
-	sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
 	sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
 
 	addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL);
@@ -923,6 +948,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
 		goto sgt_map_failed;
 	}
 
+	memset(fd, 0, sizeof(struct dpaa2_fd));
 	dpaa2_fd_set_offset(fd, priv->tx_data_offset);
 	dpaa2_fd_set_format(fd, dpaa2_fd_sg);
 	dpaa2_fd_set_addr(fd, sgt_addr);
@@ -934,10 +960,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
 sgt_map_failed:
 	dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL);
 data_map_failed:
-	if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
-		kfree(sgt_buf);
-	else
-		sgt_cache->buf[sgt_cache->count++] = sgt_buf;
+	dpaa2_eth_sgt_recycle(priv, sgt_buf);
 
 	return err;
 }
@@ -978,6 +1001,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
 	if (unlikely(dma_mapping_error(dev, addr)))
 		return -ENOMEM;
 
+	memset(fd, 0, sizeof(struct dpaa2_fd));
 	dpaa2_fd_set_addr(fd, addr);
 	dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start));
 	dpaa2_fd_set_len(fd, skb->len);
@@ -1005,9 +1029,9 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
 	struct dpaa2_eth_swa *swa;
 	u8 fd_format = dpaa2_fd_get_format(fd);
 	u32 fd_len = dpaa2_fd_get_len(fd);
-
-	struct dpaa2_eth_sgt_cache *sgt_cache;
 	struct dpaa2_sg_entry *sgt;
+	int should_free_skb = 1;
+	int i;
 
 	fd_addr = dpaa2_fd_get_addr(fd);
 	buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
@@ -1039,6 +1063,28 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
 			/* Unmap the SGT buffer */
 			dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
 					 DMA_BIDIRECTIONAL);
+		} else if (swa->type == DPAA2_ETH_SWA_SW_TSO) {
+			skb = swa->tso.skb;
+
+			sgt = (struct dpaa2_sg_entry *)(buffer_start +
+							priv->tx_data_offset);
+
+			/* Unmap and free the header */
+			dma_unmap_single(dev, dpaa2_sg_get_addr(sgt), TSO_HEADER_SIZE,
+					 DMA_TO_DEVICE);
+			kfree(dpaa2_iova_to_virt(priv->iommu_domain, dpaa2_sg_get_addr(sgt)));
+
+			/* Unmap the other SG entries for the data */
+			for (i = 1; i < swa->tso.num_sg; i++)
+				dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]),
+						 dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE);
+
+			/* Unmap the SGT buffer */
+			dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
+					 DMA_BIDIRECTIONAL);
+
+			if (!swa->tso.is_last_fd)
+				should_free_skb = 0;
 		} else {
 			skb = swa->single.skb;
 
@@ -1067,55 +1113,195 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
 	}
 
 	/* Get the timestamp value */
-	if (skb->cb[0] == TX_TSTAMP) {
-		struct skb_shared_hwtstamps shhwtstamps;
-		__le64 *ts = dpaa2_get_ts(buffer_start, true);
-		u64 ns;
-
-		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-
-		ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
-		shhwtstamps.hwtstamp = ns_to_ktime(ns);
-		skb_tstamp_tx(skb, &shhwtstamps);
-	} else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
-		mutex_unlock(&priv->onestep_tstamp_lock);
+	if (swa->type != DPAA2_ETH_SWA_SW_TSO) {
+		if (skb->cb[0] == TX_TSTAMP) {
+			struct skb_shared_hwtstamps shhwtstamps;
+			__le64 *ts = dpaa2_get_ts(buffer_start, true);
+			u64 ns;
+
+			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+
+			ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
+			shhwtstamps.hwtstamp = ns_to_ktime(ns);
+			skb_tstamp_tx(skb, &shhwtstamps);
+		} else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
+			mutex_unlock(&priv->onestep_tstamp_lock);
+		}
 	}
 
 	/* Free SGT buffer allocated on tx */
-	if (fd_format != dpaa2_fd_single) {
-		sgt_cache = this_cpu_ptr(priv->sgt_cache);
-		if (swa->type == DPAA2_ETH_SWA_SG) {
-			skb_free_frag(buffer_start);
-		} else {
-			if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
-				kfree(buffer_start);
-			else
-				sgt_cache->buf[sgt_cache->count++] = buffer_start;
+	if (fd_format != dpaa2_fd_single)
+		dpaa2_eth_sgt_recycle(priv, buffer_start);
+
+	/* Move on with skb release. If we are just confirming multiple FDs
+	 * from the same TSO skb then only the last one will need to free the
+	 * skb.
+	 */
+	if (should_free_skb)
+		napi_consume_skb(skb, in_napi);
+}
+
+static int dpaa2_eth_build_gso_fd(struct dpaa2_eth_priv *priv,
+				  struct sk_buff *skb, struct dpaa2_fd *fd,
+				  int *num_fds, u32 *total_fds_len)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	int hdr_len, total_len, data_left, fd_len;
+	int num_sge, err, i, sgt_buf_size;
+	struct dpaa2_fd *fd_start = fd;
+	struct dpaa2_sg_entry *sgt;
+	struct dpaa2_eth_swa *swa;
+	dma_addr_t sgt_addr, addr;
+	dma_addr_t tso_hdr_dma;
+	unsigned int index = 0;
+	struct tso_t tso;
+	char *tso_hdr;
+	void *sgt_buf;
+
+	/* Initialize the TSO handler, and prepare the first payload */
+	hdr_len = tso_start(skb, &tso);
+	*total_fds_len = 0;
+
+	total_len = skb->len - hdr_len;
+	while (total_len > 0) {
+		/* Prepare the HW SGT structure for this frame */
+		sgt_buf = dpaa2_eth_sgt_get(priv);
+		if (unlikely(!sgt_buf)) {
+			netdev_err(priv->net_dev, "dpaa2_eth_sgt_get() failed\n");
+			err = -ENOMEM;
+			goto err_sgt_get;
 		}
+		sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
+
+		/* Determine the data length of this frame */
+		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+		total_len -= data_left;
+		fd_len = data_left + hdr_len;
+
+		/* Prepare packet headers: MAC + IP + TCP */
+		tso_hdr = kmalloc(TSO_HEADER_SIZE, GFP_ATOMIC);
+		if (!tso_hdr) {
+			err =  -ENOMEM;
+			goto err_alloc_tso_hdr;
+		}
+
+		tso_build_hdr(skb, tso_hdr, &tso, data_left, total_len == 0);
+		tso_hdr_dma = dma_map_single(dev, tso_hdr, TSO_HEADER_SIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, tso_hdr_dma)) {
+			netdev_err(priv->net_dev, "dma_map_single(tso_hdr) failed\n");
+			err = -ENOMEM;
+			goto err_map_tso_hdr;
+		}
+
+		/* Setup the SG entry for the header */
+		dpaa2_sg_set_addr(sgt, tso_hdr_dma);
+		dpaa2_sg_set_len(sgt, hdr_len);
+		dpaa2_sg_set_final(sgt, data_left > 0 ? false : true);
+
+		/* Compose the SG entries for each fragment of data */
+		num_sge = 1;
+		while (data_left > 0) {
+			int size;
+
+			/* Move to the next SG entry */
+			sgt++;
+			size = min_t(int, tso.size, data_left);
+
+			addr = dma_map_single(dev, tso.data, size, DMA_TO_DEVICE);
+			if (dma_mapping_error(dev, addr)) {
+				netdev_err(priv->net_dev, "dma_map_single(tso.data) failed\n");
+				err = -ENOMEM;
+				goto err_map_data;
+			}
+			dpaa2_sg_set_addr(sgt, addr);
+			dpaa2_sg_set_len(sgt, size);
+			dpaa2_sg_set_final(sgt, size == data_left ? true : false);
+
+			num_sge++;
+
+			/* Build the data for the __next__ fragment */
+			data_left -= size;
+			tso_build_data(skb, &tso, size);
+		}
+
+		/* Store the skb backpointer in the SGT buffer */
+		sgt_buf_size = priv->tx_data_offset + num_sge * sizeof(struct dpaa2_sg_entry);
+		swa = (struct dpaa2_eth_swa *)sgt_buf;
+		swa->type = DPAA2_ETH_SWA_SW_TSO;
+		swa->tso.skb = skb;
+		swa->tso.num_sg = num_sge;
+		swa->tso.sgt_size = sgt_buf_size;
+		swa->tso.is_last_fd = total_len == 0 ? 1 : 0;
+
+		/* Separately map the SGT buffer */
+		sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
+		if (unlikely(dma_mapping_error(dev, sgt_addr))) {
+			netdev_err(priv->net_dev, "dma_map_single(sgt_buf) failed\n");
+			err = -ENOMEM;
+			goto err_map_sgt;
+		}
+
+		/* Setup the frame descriptor */
+		memset(fd, 0, sizeof(struct dpaa2_fd));
+		dpaa2_fd_set_offset(fd, priv->tx_data_offset);
+		dpaa2_fd_set_format(fd, dpaa2_fd_sg);
+		dpaa2_fd_set_addr(fd, sgt_addr);
+		dpaa2_fd_set_len(fd, fd_len);
+		dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
+
+		*total_fds_len += fd_len;
+		/* Advance to the next frame descriptor */
+		fd++;
+		index++;
 	}
 
-	/* Move on with skb release */
-	napi_consume_skb(skb, in_napi);
+	*num_fds = index;
+
+	return 0;
+
+err_map_sgt:
+err_map_data:
+	/* Unmap all the data S/G entries for the current FD */
+	sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
+	for (i = 1; i < num_sge; i++)
+		dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]),
+				 dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE);
+
+	/* Unmap the header entry */
+	dma_unmap_single(dev, tso_hdr_dma, TSO_HEADER_SIZE, DMA_TO_DEVICE);
+err_map_tso_hdr:
+	kfree(tso_hdr);
+err_alloc_tso_hdr:
+	dpaa2_eth_sgt_recycle(priv, sgt_buf);
+err_sgt_get:
+	/* Free all the other FDs that were already fully created */
+	for (i = 0; i < index; i++)
+		dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false);
+
+	return err;
 }
 
 static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
 				  struct net_device *net_dev)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
-	struct dpaa2_fd fd;
-	struct rtnl_link_stats64 *percpu_stats;
+	int total_enqueued = 0, retries = 0, enqueued;
 	struct dpaa2_eth_drv_stats *percpu_extras;
+	struct rtnl_link_stats64 *percpu_stats;
+	unsigned int needed_headroom;
+	int num_fds = 1, max_retries;
 	struct dpaa2_eth_fq *fq;
 	struct netdev_queue *nq;
+	struct dpaa2_fd *fd;
 	u16 queue_mapping;
-	unsigned int needed_headroom;
-	u32 fd_len;
+	void *swa = NULL;
 	u8 prio = 0;
 	int err, i;
-	void *swa;
+	u32 fd_len;
 
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
 	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+	fd = (this_cpu_ptr(priv->fd))->array;
 
 	needed_headroom = dpaa2_eth_needed_headroom(skb);
 
@@ -1130,20 +1316,28 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
 	}
 
 	/* Setup the FD fields */
-	memset(&fd, 0, sizeof(fd));
 
-	if (skb_is_nonlinear(skb)) {
-		err = dpaa2_eth_build_sg_fd(priv, skb, &fd, &swa);
+	if (skb_is_gso(skb)) {
+		err = dpaa2_eth_build_gso_fd(priv, skb, fd, &num_fds, &fd_len);
+		percpu_extras->tx_sg_frames += num_fds;
+		percpu_extras->tx_sg_bytes += fd_len;
+		percpu_extras->tx_tso_frames += num_fds;
+		percpu_extras->tx_tso_bytes += fd_len;
+	} else if (skb_is_nonlinear(skb)) {
+		err = dpaa2_eth_build_sg_fd(priv, skb, fd, &swa);
 		percpu_extras->tx_sg_frames++;
 		percpu_extras->tx_sg_bytes += skb->len;
+		fd_len = dpaa2_fd_get_len(fd);
 	} else if (skb_headroom(skb) < needed_headroom) {
-		err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd, &swa);
+		err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, fd, &swa);
 		percpu_extras->tx_sg_frames++;
 		percpu_extras->tx_sg_bytes += skb->len;
 		percpu_extras->tx_converted_sg_frames++;
 		percpu_extras->tx_converted_sg_bytes += skb->len;
+		fd_len = dpaa2_fd_get_len(fd);
 	} else {
-		err = dpaa2_eth_build_single_fd(priv, skb, &fd, &swa);
+		err = dpaa2_eth_build_single_fd(priv, skb, fd, &swa);
+		fd_len = dpaa2_fd_get_len(fd);
 	}
 
 	if (unlikely(err)) {
@@ -1151,11 +1345,12 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
 		goto err_build_fd;
 	}
 
-	if (skb->cb[0])
-		dpaa2_eth_enable_tx_tstamp(priv, &fd, swa, skb);
+	if (swa && skb->cb[0])
+		dpaa2_eth_enable_tx_tstamp(priv, fd, swa, skb);
 
 	/* Tracing point */
-	trace_dpaa2_tx_fd(net_dev, &fd);
+	for (i = 0; i < num_fds; i++)
+		trace_dpaa2_tx_fd(net_dev, &fd[i]);
 
 	/* TxConf FQ selection relies on queue id from the stack.
 	 * In case of a forwarded frame from another DPNI interface, we choose
@@ -1175,27 +1370,32 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
 		queue_mapping %= dpaa2_eth_queue_count(priv);
 	}
 	fq = &priv->fq[queue_mapping];
-
-	fd_len = dpaa2_fd_get_len(&fd);
 	nq = netdev_get_tx_queue(net_dev, queue_mapping);
 	netdev_tx_sent_queue(nq, fd_len);
 
 	/* Everything that happens after this enqueues might race with
 	 * the Tx confirmation callback for this frame
 	 */
-	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, prio, 1, NULL);
-		if (err != -EBUSY)
-			break;
+	max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
+	while (total_enqueued < num_fds && retries < max_retries) {
+		err = priv->enqueue(priv, fq, &fd[total_enqueued],
+				    prio, num_fds - total_enqueued, &enqueued);
+		if (err == -EBUSY) {
+			retries++;
+			continue;
+		}
+
+		total_enqueued += enqueued;
 	}
-	percpu_extras->tx_portal_busy += i;
+	percpu_extras->tx_portal_busy += retries;
+
 	if (unlikely(err < 0)) {
 		percpu_stats->tx_errors++;
 		/* Clean up everything, including freeing the skb */
-		dpaa2_eth_free_tx_fd(priv, fq, &fd, false);
+		dpaa2_eth_free_tx_fd(priv, fq, fd, false);
 		netdev_tx_completed_queue(nq, 1, fd_len);
 	} else {
-		percpu_stats->tx_packets++;
+		percpu_stats->tx_packets += total_enqueued;
 		percpu_stats->tx_bytes += fd_len;
 	}
 
@@ -1523,7 +1723,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv)
 		count = sgt_cache->count;
 
 		for (i = 0; i < count; i++)
-			kfree(sgt_cache->buf[i]);
+			skb_free_frag(sgt_cache->buf[i]);
 		sgt_cache->count = 0;
 	}
 }
@@ -4115,7 +4315,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev)
 	net_dev->features = NETIF_F_RXCSUM |
 			    NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			    NETIF_F_SG | NETIF_F_HIGHDMA |
-			    NETIF_F_LLTX | NETIF_F_HW_TC;
+			    NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO;
+	net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS;
 	net_dev->hw_features = net_dev->features;
 
 	if (priv->dpni_attrs.vlan_filter_entries)
@@ -4397,6 +4598,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 		goto err_alloc_sgt_cache;
 	}
 
+	priv->fd = alloc_percpu(*priv->fd);
+	if (!priv->fd) {
+		dev_err(dev, "alloc_percpu(fds) failed\n");
+		err = -ENOMEM;
+		goto err_alloc_fds;
+	}
+
 	err = dpaa2_eth_netdev_init(net_dev);
 	if (err)
 		goto err_netdev_init;
@@ -4484,6 +4692,8 @@ err_poll_thread:
 err_alloc_rings:
 err_csum:
 err_netdev_init:
+	free_percpu(priv->fd);
+err_alloc_fds:
 	free_percpu(priv->sgt_cache);
 err_alloc_sgt_cache:
 	free_percpu(priv->percpu_extras);
@@ -4539,6 +4749,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 		fsl_mc_free_irqs(ls_dev);
 
 	dpaa2_eth_free_rings(priv);
+	free_percpu(priv->fd);
 	free_percpu(priv->sgt_cache);
 	free_percpu(priv->percpu_stats);
 	free_percpu(priv->percpu_extras);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index e54e70ebdd05..b79831cd1a94 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -122,6 +122,7 @@ enum dpaa2_eth_swa_type {
 	DPAA2_ETH_SWA_SINGLE,
 	DPAA2_ETH_SWA_SG,
 	DPAA2_ETH_SWA_XDP,
+	DPAA2_ETH_SWA_SW_TSO,
 };
 
 /* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */
@@ -142,6 +143,12 @@ struct dpaa2_eth_swa {
 			int dma_size;
 			struct xdp_frame *xdpf;
 		} xdp;
+		struct {
+			struct sk_buff *skb;
+			int num_sg;
+			int sgt_size;
+			int is_last_fd;
+		} tso;
 	};
 };
 
@@ -354,6 +361,8 @@ struct dpaa2_eth_drv_stats {
 	__u64	tx_conf_bytes;
 	__u64	tx_sg_frames;
 	__u64	tx_sg_bytes;
+	__u64	tx_tso_frames;
+	__u64	tx_tso_bytes;
 	__u64	rx_sg_frames;
 	__u64	rx_sg_bytes;
 	/* Linear skbs sent as a S/G FD due to insufficient headroom */
@@ -493,8 +502,15 @@ struct dpaa2_eth_trap_data {
 	struct dpaa2_eth_priv *priv;
 };
 
+#define DPAA2_ETH_SG_ENTRIES_MAX	(PAGE_SIZE / sizeof(struct scatterlist))
+
 #define DPAA2_ETH_DEFAULT_COPYBREAK	512
 
+#define DPAA2_ETH_ENQUEUE_MAX_FDS	200
+struct dpaa2_eth_fds {
+	struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS];
+};
+
 /* Driver private data */
 struct dpaa2_eth_priv {
 	struct net_device *net_dev;
@@ -577,6 +593,8 @@ struct dpaa2_eth_priv {
 	struct devlink_port devlink_port;
 
 	u32 rx_copybreak;
+
+	struct dpaa2_eth_fds __percpu *fd;
 };
 
 struct dpaa2_eth_devlink_priv {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 3fdbf87dccb1..eea7d7a07c00 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -44,6 +44,8 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
 	"[drv] tx conf bytes",
 	"[drv] tx sg frames",
 	"[drv] tx sg bytes",
+	"[drv] tx tso frames",
+	"[drv] tx tso bytes",
 	"[drv] rx sg frames",
 	"[drv] rx sg bytes",
 	"[drv] tx converted sg frames",
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index fb39e406b7fc..68d806dc3701 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -18,6 +18,8 @@
 #define ENETC_MAX_MTU		(ENETC_MAC_MAXFRM_SIZE - \
 				(ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN))
 
+#define ENETC_CBD_DATA_MEM_ALIGN 64
+
 struct enetc_tx_swbd {
 	union {
 		struct sk_buff *skb;
@@ -415,6 +417,42 @@ int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count);
 int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count);
 int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd);
 
+static inline void *enetc_cbd_alloc_data_mem(struct enetc_si *si,
+					     struct enetc_cbd *cbd,
+					     int size, dma_addr_t *dma,
+					     void **data_align)
+{
+	struct enetc_cbdr *ring = &si->cbd_ring;
+	dma_addr_t dma_align;
+	void *data;
+
+	data = dma_alloc_coherent(ring->dma_dev,
+				  size + ENETC_CBD_DATA_MEM_ALIGN,
+				  dma, GFP_KERNEL);
+	if (!data) {
+		dev_err(ring->dma_dev, "CBD alloc data memory failed!\n");
+		return NULL;
+	}
+
+	dma_align = ALIGN(*dma, ENETC_CBD_DATA_MEM_ALIGN);
+	*data_align = PTR_ALIGN(data, ENETC_CBD_DATA_MEM_ALIGN);
+
+	cbd->addr[0] = cpu_to_le32(lower_32_bits(dma_align));
+	cbd->addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+	cbd->length = cpu_to_le16(size);
+
+	return data;
+}
+
+static inline void enetc_cbd_free_data_mem(struct enetc_si *si, int size,
+					   void *data, dma_addr_t *dma)
+{
+	struct enetc_cbdr *ring = &si->cbd_ring;
+
+	dma_free_coherent(ring->dma_dev, size + ENETC_CBD_DATA_MEM_ALIGN,
+			  data, *dma);
+}
+
 #ifdef CONFIG_FSL_ENETC_QOS
 int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
 void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index 073e56dcca4e..af68dc46a795 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -166,70 +166,55 @@ int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 	return enetc_send_cmd(si, &cbd);
 }
 
-#define RFSE_ALIGN	64
 /* Set entry in RFS table */
 int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 		       int index)
 {
 	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd cbd = {.cmd = 0};
-	dma_addr_t dma, dma_align;
 	void *tmp, *tmp_align;
+	dma_addr_t dma;
 	int err;
 
 	/* fill up the "set" descriptor */
 	cbd.cmd = 0;
 	cbd.cls = 4;
 	cbd.index = cpu_to_le16(index);
-	cbd.length = cpu_to_le16(sizeof(*rfse));
 	cbd.opt[3] = cpu_to_le32(0); /* SI */
 
-	tmp = dma_alloc_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
-				 &dma, GFP_KERNEL);
-	if (!tmp) {
-		dev_err(ring->dma_dev, "DMA mapping of RFS entry failed!\n");
+	tmp = enetc_cbd_alloc_data_mem(si, &cbd, sizeof(*rfse),
+				       &dma, &tmp_align);
+	if (!tmp)
 		return -ENOMEM;
-	}
 
-	dma_align = ALIGN(dma, RFSE_ALIGN);
-	tmp_align = PTR_ALIGN(tmp, RFSE_ALIGN);
 	memcpy(tmp_align, rfse, sizeof(*rfse));
 
-	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
-	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
-
 	err = enetc_send_cmd(si, &cbd);
 	if (err)
 		dev_err(ring->dma_dev, "FS entry add failed (%d)!", err);
 
-	dma_free_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
-			  tmp, dma);
+	enetc_cbd_free_data_mem(si, sizeof(*rfse), tmp, &dma);
 
 	return err;
 }
 
-#define RSSE_ALIGN	64
 static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 			       bool read)
 {
 	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd cbd = {.cmd = 0};
-	dma_addr_t dma, dma_align;
 	u8 *tmp, *tmp_align;
+	dma_addr_t dma;
 	int err, i;
 
-	if (count < RSSE_ALIGN)
+	if (count < ENETC_CBD_DATA_MEM_ALIGN)
 		/* HW only takes in a full 64 entry table */
 		return -EINVAL;
 
-	tmp = dma_alloc_coherent(ring->dma_dev, count + RSSE_ALIGN,
-				 &dma, GFP_KERNEL);
-	if (!tmp) {
-		dev_err(ring->dma_dev, "DMA mapping of RSS table failed!\n");
+	tmp = enetc_cbd_alloc_data_mem(si, &cbd, count,
+				       &dma, (void *)&tmp_align);
+	if (!tmp)
 		return -ENOMEM;
-	}
-	dma_align = ALIGN(dma, RSSE_ALIGN);
-	tmp_align = PTR_ALIGN(tmp, RSSE_ALIGN);
 
 	if (!read)
 		for (i = 0; i < count; i++)
@@ -238,10 +223,6 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 	/* fill up the descriptor */
 	cbd.cmd = read ? 2 : 1;
 	cbd.cls = 3;
-	cbd.length = cpu_to_le16(count);
-
-	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
-	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
 
 	err = enetc_send_cmd(si, &cbd);
 	if (err)
@@ -251,7 +232,7 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 		for (i = 0; i < count; i++)
 			table[i] = tmp_align[i];
 
-	dma_free_coherent(ring->dma_dev, count + RSSE_ALIGN, tmp, dma);
+	enetc_cbd_free_data_mem(si, count, tmp, &dma);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 3555c12edb45..5a3eea1a718b 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -52,10 +52,11 @@ static int enetc_setup_taprio(struct net_device *ndev,
 	struct enetc_cbd cbd = {.cmd = 0};
 	struct tgs_gcl_conf *gcl_config;
 	struct tgs_gcl_data *gcl_data;
-	struct gce *gce;
 	dma_addr_t dma;
+	struct gce *gce;
 	u16 data_size;
 	u16 gcl_len;
+	void *tmp;
 	u32 tge;
 	int err;
 	int i;
@@ -82,8 +83,9 @@ static int enetc_setup_taprio(struct net_device *ndev,
 	gcl_config = &cbd.gcl_conf;
 
 	data_size = struct_size(gcl_data, entry, gcl_len);
-	gcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
-	if (!gcl_data)
+	tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+				       &dma, (void *)&gcl_data);
+	if (!tmp)
 		return -ENOMEM;
 
 	gce = (struct gce *)(gcl_data + 1);
@@ -107,19 +109,8 @@ static int enetc_setup_taprio(struct net_device *ndev,
 		temp_gce->period = cpu_to_le32(temp_entry->interval);
 	}
 
-	cbd.length = cpu_to_le16(data_size);
 	cbd.status_flags = 0;
 
-	dma = dma_map_single(&priv->si->pdev->dev, gcl_data,
-			     data_size, DMA_TO_DEVICE);
-	if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
-		netdev_err(priv->si->ndev, "DMA mapping failed!\n");
-		kfree(gcl_data);
-		return -ENOMEM;
-	}
-
-	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
-	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
 	cbd.cls = BDCR_CMD_PORT_GCL;
 	cbd.status_flags = 0;
 
@@ -132,8 +123,7 @@ static int enetc_setup_taprio(struct net_device *ndev,
 			 ENETC_QBV_PTGCR_OFFSET,
 			 tge & (~ENETC_QBV_TGE));
 
-	dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE);
-	kfree(gcl_data);
+	enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
 
 	return err;
 }
@@ -450,6 +440,7 @@ static struct actions_fwd enetc_act_fwd[] = {
 };
 
 static struct enetc_psfp epsfp = {
+	.dev_bitmap = 0,
 	.psfp_sfi_bitmap = NULL,
 };
 
@@ -463,8 +454,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 	struct enetc_cbd cbd = {.cmd = 0};
 	struct streamid_data *si_data;
 	struct streamid_conf *si_conf;
-	u16 data_size;
 	dma_addr_t dma;
+	u16 data_size;
+	void *tmp;
 	int port;
 	int err;
 
@@ -485,21 +477,11 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 	cbd.status_flags = 0;
 
 	data_size = sizeof(struct streamid_data);
-	si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
-	if (!si_data)
+	tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+				       &dma, (void *)&si_data);
+	if (!tmp)
 		return -ENOMEM;
-	cbd.length = cpu_to_le16(data_size);
 
-	dma = dma_map_single(&priv->si->pdev->dev, si_data,
-			     data_size, DMA_FROM_DEVICE);
-	if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
-		netdev_err(priv->si->ndev, "DMA mapping failed!\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
-	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
 	eth_broadcast_addr(si_data->dmac);
 	si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK
 			       + ((0x3 << 14) | ENETC_CBDR_SID_VIDM));
@@ -520,11 +502,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 		goto out;
 
 	/* Enable the entry overwrite again incase space flushed by hardware */
-	memset(&cbd, 0, sizeof(cbd));
-
-	cbd.index = cpu_to_le16((u16)sid->index);
-	cbd.cmd = 0;
-	cbd.cls = BDCR_CMD_STREAM_IDENTIFY;
 	cbd.status_flags = 0;
 
 	si_conf->en = 0x80;
@@ -537,11 +514,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 
 	memset(si_data, 0, data_size);
 
-	cbd.length = cpu_to_le16(data_size);
-
-	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
-	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
-
 	/* VIDM default to be 1.
 	 * VID Match. If set (b1) then the VID must match, otherwise
 	 * any VID is considered a match. VIDM setting is only used
@@ -561,10 +533,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 
 	err = enetc_send_cmd(priv->si, &cbd);
 out:
-	if (!dma_mapping_error(&priv->si->pdev->dev, dma))
-		dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE);
-
-	kfree(si_data);
+	enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
 
 	return err;
 }
@@ -635,6 +604,7 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
 	struct sfi_counter_data *data_buf;
 	dma_addr_t dma;
 	u16 data_size;
+	void *tmp;
 	int err;
 
 	cbd.index = cpu_to_le16((u16)index);
@@ -643,21 +613,11 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
 	cbd.status_flags = 0;
 
 	data_size = sizeof(struct sfi_counter_data);
-	data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
-	if (!data_buf)
-		return -ENOMEM;
-
-	dma = dma_map_single(&priv->si->pdev->dev, data_buf,
-			     data_size, DMA_FROM_DEVICE);
-	if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
-		netdev_err(priv->si->ndev, "DMA mapping failed!\n");
-		err = -ENOMEM;
-		goto exit;
-	}
-	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
-	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
 
-	cbd.length = cpu_to_le16(data_size);
+	tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+				       &dma, (void *)&data_buf);
+	if (!tmp)
+		return -ENOMEM;
 
 	err = enetc_send_cmd(priv->si, &cbd);
 	if (err)
@@ -684,7 +644,8 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
 				data_buf->flow_meter_dropl;
 
 exit:
-	kfree(data_buf);
+	enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
+
 	return err;
 }
 
@@ -726,6 +687,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
 	dma_addr_t dma;
 	u16 data_size;
 	int err, i;
+	void *tmp;
 	u64 now;
 
 	cbd.index = cpu_to_le16(sgi->index);
@@ -772,24 +734,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
 	sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3;
 
 	data_size = struct_size(sgcl_data, sgcl, sgi->num_entries);
-
-	sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
-	if (!sgcl_data)
-		return -ENOMEM;
-
-	cbd.length = cpu_to_le16(data_size);
-
-	dma = dma_map_single(&priv->si->pdev->dev,
-			     sgcl_data, data_size,
-			     DMA_FROM_DEVICE);
-	if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
-		netdev_err(priv->si->ndev, "DMA mapping failed!\n");
-		kfree(sgcl_data);
+	tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+				       &dma, (void *)&sgcl_data);
+	if (!tmp)
 		return -ENOMEM;
-	}
-
-	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
-	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
 
 	sgce = &sgcl_data->sgcl[0];
 
@@ -844,8 +792,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
 	err = enetc_send_cmd(priv->si, &cbd);
 
 exit:
-	kfree(sgcl_data);
-
+	enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 9298fbecb31a..6f18c9a03231 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -167,6 +167,7 @@ struct hnae3_handle;
 
 struct hnae3_queue {
 	void __iomem *io_base;
+	void __iomem *mem_base;
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_handle *handle;
 	int tqp_index;		/* index in a handle */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index babc5d7a3b52..0b8a73c40b12 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2028,9 +2028,73 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
 	return bd_num;
 }
 
+static void hns3_tx_push_bd(struct hns3_enet_ring *ring, int num)
+{
+#define HNS3_BYTES_PER_64BIT		8
+
+	struct hns3_desc desc[HNS3_MAX_PUSH_BD_NUM] = {};
+	int offset = 0;
+
+	/* make sure everything is visible to device before
+	 * excuting tx push or updating doorbell
+	 */
+	dma_wmb();
+
+	do {
+		int idx = (ring->next_to_use - num + ring->desc_num) %
+			  ring->desc_num;
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_push++;
+		u64_stats_update_end(&ring->syncp);
+		memcpy(&desc[offset], &ring->desc[idx],
+		       sizeof(struct hns3_desc));
+		offset++;
+	} while (--num);
+
+	__iowrite64_copy(ring->tqp->mem_base, desc,
+			 (sizeof(struct hns3_desc) * HNS3_MAX_PUSH_BD_NUM) /
+			 HNS3_BYTES_PER_64BIT);
+
+	io_stop_wc();
+}
+
+static void hns3_tx_mem_doorbell(struct hns3_enet_ring *ring)
+{
+#define HNS3_MEM_DOORBELL_OFFSET	64
+
+	__le64 bd_num = cpu_to_le64((u64)ring->pending_buf);
+
+	/* make sure everything is visible to device before
+	 * excuting tx push or updating doorbell
+	 */
+	dma_wmb();
+
+	__iowrite64_copy(ring->tqp->mem_base + HNS3_MEM_DOORBELL_OFFSET,
+			 &bd_num, 1);
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_mem_doorbell += ring->pending_buf;
+	u64_stats_update_end(&ring->syncp);
+
+	io_stop_wc();
+}
+
 static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
 			     bool doorbell)
 {
+	struct net_device *netdev = ring_to_netdev(ring);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+	/* when tx push is enabled, the packet whose number of BD below
+	 * HNS3_MAX_PUSH_BD_NUM can be pushed directly.
+	 */
+	if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num &&
+	    !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) {
+		hns3_tx_push_bd(ring, num);
+		WRITE_ONCE(ring->last_to_use, ring->next_to_use);
+		return;
+	}
+
 	ring->pending_buf += num;
 
 	if (!doorbell) {
@@ -2038,11 +2102,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
 		return;
 	}
 
-	if (!ring->pending_buf)
-		return;
+	if (ring->tqp->mem_base)
+		hns3_tx_mem_doorbell(ring);
+	else
+		writel(ring->pending_buf,
+		       ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
 
-	writel(ring->pending_buf,
-	       ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
 	ring->pending_buf = 0;
 	WRITE_ONCE(ring->last_to_use, ring->next_to_use);
 }
@@ -2732,6 +2797,9 @@ static void hns3_dump_queue_stats(struct net_device *ndev,
 		    "seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n",
 		    tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_more,
 		    tx_ring->stats.restart_queue, tx_ring->stats.tx_busy);
+
+	netdev_info(ndev, "tx_push: %llu, tx_mem_doorbell: %llu\n",
+		    tx_ring->stats.tx_push, tx_ring->stats.tx_mem_doorbell);
 }
 
 static void hns3_dump_queue_reg(struct net_device *ndev,
@@ -5094,6 +5162,9 @@ static void hns3_state_init(struct hnae3_handle *handle)
 
 	set_bit(HNS3_NIC_STATE_INITED, &priv->state);
 
+	if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps))
+		set_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state);
+
 	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
 		set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index a05a0c7423ce..4a3253692dcc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -7,6 +7,7 @@
 #include <linux/dim.h>
 #include <linux/if_vlan.h>
 #include <net/page_pool.h>
+#include <asm/barrier.h>
 
 #include "hnae3.h"
 
@@ -25,9 +26,12 @@ enum hns3_nic_state {
 	HNS3_NIC_STATE2_RESET_REQUESTED,
 	HNS3_NIC_STATE_HW_TX_CSUM_ENABLE,
 	HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE,
+	HNS3_NIC_STATE_TX_PUSH_ENABLE,
 	HNS3_NIC_STATE_MAX
 };
 
+#define HNS3_MAX_PUSH_BD_NUM		2
+
 #define HNS3_RING_RX_RING_BASEADDR_L_REG	0x00000
 #define HNS3_RING_RX_RING_BASEADDR_H_REG	0x00004
 #define HNS3_RING_RX_RING_BD_NUM_REG		0x00008
@@ -410,6 +414,8 @@ struct ring_stats {
 			u64 tx_pkts;
 			u64 tx_bytes;
 			u64 tx_more;
+			u64 tx_push;
+			u64 tx_mem_doorbell;
 			u64 restart_queue;
 			u64 tx_busy;
 			u64 tx_copy;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index c06c39ece80d..6469238ae090 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -23,6 +23,8 @@ static const struct hns3_stats hns3_txq_stats[] = {
 	HNS3_TQP_STAT("packets", tx_pkts),
 	HNS3_TQP_STAT("bytes", tx_bytes),
 	HNS3_TQP_STAT("more", tx_more),
+	HNS3_TQP_STAT("push", tx_push),
+	HNS3_TQP_STAT("mem_doorbell", tx_mem_doorbell),
 	HNS3_TQP_STAT("wake", restart_queue),
 	HNS3_TQP_STAT("busy", tx_busy),
 	HNS3_TQP_STAT("copy", tx_copy),
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 24f7afacae02..78d0498bdabc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1643,6 +1643,7 @@ static int hclge_config_gro(struct hclge_dev *hdev)
 
 static int hclge_alloc_tqps(struct hclge_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	struct hclge_comm_tqp *tqp;
 	int i;
 
@@ -1676,6 +1677,14 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev)
 					 (i - HCLGE_TQP_MAX_SIZE_DEV_V2) *
 					 HCLGE_TQP_REG_SIZE;
 
+		/* when device supports tx push and has device memory,
+		 * the queue can execute push mode or doorbell mode on
+		 * device memory.
+		 */
+		if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps))
+			tqp->q.mem_base = hdev->hw.hw.mem_base +
+					  HCLGE_TQP_MEM_OFFSET(hdev, i);
+
 		tqp++;
 	}
 
@@ -11008,8 +11017,6 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
 
 static int hclge_dev_mem_map(struct hclge_dev *hdev)
 {
-#define HCLGE_MEM_BAR		4
-
 	struct pci_dev *pdev = hdev->pdev;
 	struct hclge_hw *hw = &hdev->hw;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index adfb26e79262..f7f5a4b09068 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -169,6 +169,14 @@ enum HLCGE_PORT_TYPE {
 #define HCLGE_VECTOR0_ALL_MSIX_ERR_B	6U
 #define HCLGE_TRIGGER_IMP_RESET_B	7U
 
+#define HCLGE_TQP_MEM_SIZE		0x10000
+#define HCLGE_MEM_BAR			4
+/* in the bar4, the first half is for roce, and the second half is for nic */
+#define HCLGE_NIC_MEM_OFFSET(hdev)	\
+	(pci_resource_len((hdev)->pdev, HCLGE_MEM_BAR) >> 1)
+#define HCLGE_TQP_MEM_OFFSET(hdev, i)	\
+	(HCLGE_NIC_MEM_OFFSET(hdev) + HCLGE_TQP_MEM_SIZE * (i))
+
 #define HCLGE_MAC_DEFAULT_FRAME \
 	(ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN)
 #define HCLGE_MAC_MIN_FRAME		64
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 21442a9bb996..93389bec8d89 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -321,6 +321,7 @@ static int hclgevf_get_pf_media_type(struct hclgevf_dev *hdev)
 
 static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	struct hclge_comm_tqp *tqp;
 	int i;
 
@@ -354,6 +355,14 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
 					 (i - HCLGEVF_TQP_MAX_SIZE_DEV_V2) *
 					 HCLGEVF_TQP_REG_SIZE;
 
+		/* when device supports tx push and has device memory,
+		 * the queue can execute push mode or doorbell mode on
+		 * device memory.
+		 */
+		if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps))
+			tqp->q.mem_base = hdev->hw.hw.mem_base +
+					  HCLGEVF_TQP_MEM_OFFSET(hdev, i);
+
 		tqp++;
 	}
 
@@ -2546,8 +2555,6 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
 
 static int hclgevf_dev_mem_map(struct hclgevf_dev *hdev)
 {
-#define HCLGEVF_MEM_BAR		4
-
 	struct pci_dev *pdev = hdev->pdev;
 	struct hclgevf_hw *hw = &hdev->hw;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 502ca1ce1a90..4b00fd44f118 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -96,6 +96,14 @@
 
 #define HCLGEVF_RSS_IND_TBL_SIZE		512
 
+#define HCLGEVF_TQP_MEM_SIZE		0x10000
+#define HCLGEVF_MEM_BAR			4
+/* in the bar4, the first half is for roce, and the second half is for nic */
+#define HCLGEVF_NIC_MEM_OFFSET(hdev)	\
+	(pci_resource_len((hdev)->pdev, HCLGEVF_MEM_BAR) >> 1)
+#define HCLGEVF_TQP_MEM_OFFSET(hdev, i)		\
+	(HCLGEVF_NIC_MEM_OFFSET(hdev) + HCLGEVF_TQP_MEM_SIZE * (i))
+
 #define HCLGEVF_MAC_MAX_FRAME		9728
 
 #define HCLGEVF_STATS_TIMER_INTERVAL	36U
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 80c5cecaf2b5..55c6bce5da61 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -854,6 +854,10 @@ struct i40e_vsi {
 	u64 tx_force_wb;
 	u64 rx_buf_failed;
 	u64 rx_page_failed;
+	u64 rx_page_reuse;
+	u64 rx_page_alloc;
+	u64 rx_page_waive;
+	u64 rx_page_busy;
 
 	/* These are containers of ring pointers, allocated at run-time */
 	struct i40e_ring **rx_rings;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 7abef88801fb..42439f725aa4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -769,7 +769,7 @@ static bool i40e_asq_done(struct i40e_hw *hw)
 }
 
 /**
- *  i40e_asq_send_command_atomic - send command to Admin Queue
+ *  i40e_asq_send_command_atomic_exec - send command to Admin Queue
  *  @hw: pointer to the hw struct
  *  @desc: prefilled descriptor describing the command (non DMA mem)
  *  @buff: buffer to use for indirect commands
@@ -780,11 +780,13 @@ static bool i40e_asq_done(struct i40e_hw *hw)
  *  This is the main send command driver routine for the Admin Queue send
  *  queue.  It runs the queue, cleans the queue, etc
  **/
-i40e_status
-i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
-			     void *buff, /* can be NULL */ u16  buff_size,
-			     struct i40e_asq_cmd_details *cmd_details,
-			     bool is_atomic_context)
+static i40e_status
+i40e_asq_send_command_atomic_exec(struct i40e_hw *hw,
+				  struct i40e_aq_desc *desc,
+				  void *buff, /* can be NULL */
+				  u16  buff_size,
+				  struct i40e_asq_cmd_details *cmd_details,
+				  bool is_atomic_context)
 {
 	i40e_status status = 0;
 	struct i40e_dma_mem *dma_buff = NULL;
@@ -794,8 +796,6 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
 	u16  retval = 0;
 	u32  val = 0;
 
-	mutex_lock(&hw->aq.asq_mutex);
-
 	if (hw->aq.asq.count == 0) {
 		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
 			   "AQTX: Admin queue not initialized.\n");
@@ -969,6 +969,36 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
 	}
 
 asq_send_command_error:
+	return status;
+}
+
+/**
+ *  i40e_asq_send_command_atomic - send command to Admin Queue
+ *  @hw: pointer to the hw struct
+ *  @desc: prefilled descriptor describing the command (non DMA mem)
+ *  @buff: buffer to use for indirect commands
+ *  @buff_size: size of buffer for indirect commands
+ *  @cmd_details: pointer to command details structure
+ *  @is_atomic_context: is the function called in an atomic context?
+ *
+ *  Acquires the lock and calls the main send command execution
+ *  routine.
+ **/
+i40e_status
+i40e_asq_send_command_atomic(struct i40e_hw *hw,
+			     struct i40e_aq_desc *desc,
+			     void *buff, /* can be NULL */
+			     u16  buff_size,
+			     struct i40e_asq_cmd_details *cmd_details,
+			     bool is_atomic_context)
+{
+	i40e_status status;
+
+	mutex_lock(&hw->aq.asq_mutex);
+	status = i40e_asq_send_command_atomic_exec(hw, desc, buff, buff_size,
+						   cmd_details,
+						   is_atomic_context);
+
 	mutex_unlock(&hw->aq.asq_mutex);
 	return status;
 }
@@ -983,6 +1013,52 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
 }
 
 /**
+ *  i40e_asq_send_command_atomic_v2 - send command to Admin Queue
+ *  @hw: pointer to the hw struct
+ *  @desc: prefilled descriptor describing the command (non DMA mem)
+ *  @buff: buffer to use for indirect commands
+ *  @buff_size: size of buffer for indirect commands
+ *  @cmd_details: pointer to command details structure
+ *  @is_atomic_context: is the function called in an atomic context?
+ *  @aq_status: pointer to Admin Queue status return value
+ *
+ *  Acquires the lock and calls the main send command execution
+ *  routine. Returns the last Admin Queue status in aq_status
+ *  to avoid race conditions in access to hw->aq.asq_last_status.
+ **/
+i40e_status
+i40e_asq_send_command_atomic_v2(struct i40e_hw *hw,
+				struct i40e_aq_desc *desc,
+				void *buff, /* can be NULL */
+				u16  buff_size,
+				struct i40e_asq_cmd_details *cmd_details,
+				bool is_atomic_context,
+				enum i40e_admin_queue_err *aq_status)
+{
+	i40e_status status;
+
+	mutex_lock(&hw->aq.asq_mutex);
+	status = i40e_asq_send_command_atomic_exec(hw, desc, buff,
+						   buff_size,
+						   cmd_details,
+						   is_atomic_context);
+	if (aq_status)
+		*aq_status = hw->aq.asq_last_status;
+	mutex_unlock(&hw->aq.asq_mutex);
+	return status;
+}
+
+i40e_status
+i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+			 void *buff, /* can be NULL */ u16  buff_size,
+			 struct i40e_asq_cmd_details *cmd_details,
+			 enum i40e_admin_queue_err *aq_status)
+{
+	return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size,
+					       cmd_details, true, aq_status);
+}
+
+/**
  *  i40e_fill_default_direct_cmd_desc - AQ descriptor helper function
  *  @desc:     pointer to the temp descriptor (non DMA mem)
  *  @opcode:   the opcode can be used to decide which flags to turn off or on
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 9ddeb015eb7e..6aefffd83615 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1899,8 +1899,9 @@ i40e_status i40e_aq_add_vsi(struct i40e_hw *hw,
 
 	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
 
-	status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
-				    sizeof(vsi_ctx->info), cmd_details);
+	status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info,
+					      sizeof(vsi_ctx->info),
+					      cmd_details, true);
 
 	if (status)
 		goto aq_add_vsi_exit;
@@ -2287,8 +2288,9 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
 
 	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
 
-	status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
-				    sizeof(vsi_ctx->info), cmd_details);
+	status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info,
+					      sizeof(vsi_ctx->info),
+					      cmd_details, true);
 
 	vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used);
 	vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
@@ -2632,33 +2634,28 @@ get_veb_exit:
 }
 
 /**
- * i40e_aq_add_macvlan
- * @hw: pointer to the hw struct
- * @seid: VSI for the mac address
+ * i40e_prepare_add_macvlan
  * @mv_list: list of macvlans to be added
+ * @desc: pointer to AQ descriptor structure
  * @count: length of the list
- * @cmd_details: pointer to command details structure or NULL
+ * @seid: VSI for the mac address
  *
- * Add MAC/VLAN addresses to the HW filtering
+ * Internal helper function that prepares the add macvlan request
+ * and returns the buffer size.
  **/
-i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
-			struct i40e_aqc_add_macvlan_element_data *mv_list,
-			u16 count, struct i40e_asq_cmd_details *cmd_details)
+static u16
+i40e_prepare_add_macvlan(struct i40e_aqc_add_macvlan_element_data *mv_list,
+			 struct i40e_aq_desc *desc, u16 count, u16 seid)
 {
-	struct i40e_aq_desc desc;
 	struct i40e_aqc_macvlan *cmd =
-		(struct i40e_aqc_macvlan *)&desc.params.raw;
-	i40e_status status;
+		(struct i40e_aqc_macvlan *)&desc->params.raw;
 	u16 buf_size;
 	int i;
 
-	if (count == 0 || !mv_list || !hw)
-		return I40E_ERR_PARAM;
-
 	buf_size = count * sizeof(*mv_list);
 
 	/* prep the rest of the request */
-	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_macvlan);
+	i40e_fill_default_direct_cmd_desc(desc, i40e_aqc_opc_add_macvlan);
 	cmd->num_addresses = cpu_to_le16(count);
 	cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
 	cmd->seid[1] = 0;
@@ -2669,14 +2666,71 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
 			mv_list[i].flags |=
 			       cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC);
 
-	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	desc->flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
 	if (buf_size > I40E_AQ_LARGE_BUF)
-		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+		desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
 
-	status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
-				       cmd_details);
+	return buf_size;
+}
 
-	return status;
+/**
+ * i40e_aq_add_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add MAC/VLAN addresses to the HW filtering
+ **/
+i40e_status
+i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
+		    struct i40e_aqc_add_macvlan_element_data *mv_list,
+		    u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	u16 buf_size;
+
+	if (count == 0 || !mv_list || !hw)
+		return I40E_ERR_PARAM;
+
+	buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid);
+
+	return i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size,
+					    cmd_details, true);
+}
+
+/**
+ * i40e_aq_add_macvlan_v2
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ * @aq_status: pointer to Admin Queue status return value
+ *
+ * Add MAC/VLAN addresses to the HW filtering.
+ * The _v2 version returns the last Admin Queue status in aq_status
+ * to avoid race conditions in access to hw->aq.asq_last_status.
+ * It also calls _v2 versions of asq_send_command functions to
+ * get the aq_status on the stack.
+ **/
+i40e_status
+i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid,
+		       struct i40e_aqc_add_macvlan_element_data *mv_list,
+		       u16 count, struct i40e_asq_cmd_details *cmd_details,
+		       enum i40e_admin_queue_err *aq_status)
+{
+	struct i40e_aq_desc desc;
+	u16 buf_size;
+
+	if (count == 0 || !mv_list || !hw)
+		return I40E_ERR_PARAM;
+
+	buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid);
+
+	return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size,
+					       cmd_details, true, aq_status);
 }
 
 /**
@@ -2715,13 +2769,59 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
 	if (buf_size > I40E_AQ_LARGE_BUF)
 		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
 
-	status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
-				       cmd_details);
+	status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size,
+					      cmd_details, true);
 
 	return status;
 }
 
 /**
+ * i40e_aq_remove_macvlan_v2
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ * @aq_status: pointer to Admin Queue status return value
+ *
+ * Remove MAC/VLAN addresses from the HW filtering.
+ * The _v2 version returns the last Admin Queue status in aq_status
+ * to avoid race conditions in access to hw->aq.asq_last_status.
+ * It also calls _v2 versions of asq_send_command functions to
+ * get the aq_status on the stack.
+ **/
+i40e_status
+i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_remove_macvlan_element_data *mv_list,
+			  u16 count, struct i40e_asq_cmd_details *cmd_details,
+			  enum i40e_admin_queue_err *aq_status)
+{
+	struct i40e_aqc_macvlan *cmd;
+	struct i40e_aq_desc desc;
+	u16 buf_size;
+
+	if (count == 0 || !mv_list || !hw)
+		return I40E_ERR_PARAM;
+
+	buf_size = count * sizeof(*mv_list);
+
+	/* prep the rest of the request */
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan);
+	cmd = (struct i40e_aqc_macvlan *)&desc.params.raw;
+	cmd->num_addresses = cpu_to_le16(count);
+	cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+	cmd->seid[1] = 0;
+	cmd->seid[2] = 0;
+
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	if (buf_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size,
+						 cmd_details, true, aq_status);
+}
+
+/**
  * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule
  * @hw: pointer to the hw struct
  * @opcode: AQ opcode for add or delete mirror rule
@@ -3868,7 +3968,8 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
 
 	cmd->seid = cpu_to_le16(seid);
 
-	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+	status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0,
+					      cmd_details, true);
 
 	return status;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 1e57cc8c47d7..90fff05fbd2b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -275,9 +275,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 rx_ring->rx_stats.alloc_page_failed,
 			 rx_ring->rx_stats.alloc_buff_failed);
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n",
+			 "    rx_rings[%i]: rx_stats: realloc_count = 0, page_reuse_count = %lld\n",
 			 i,
-			 rx_ring->rx_stats.realloc_count,
 			 rx_ring->rx_stats.page_reuse_count);
 		dev_info(&pf->pdev->dev,
 			 "    rx_rings[%i]: size = %i\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 091f36adbbe1..e48499624d22 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -295,6 +295,10 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
 	I40E_VSI_STAT("tx_busy", tx_busy),
 	I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed),
 	I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+	I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse),
+	I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc),
+	I40E_VSI_STAT("rx_cache_waive", rx_page_waive),
+	I40E_VSI_STAT("rx_cache_busy", rx_page_busy),
 };
 
 /* These PF_STATs might look like duplicates of some NETDEV_STATs,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f531bc1df338..9b7ce6d9a92b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -773,6 +773,7 @@ void i40e_update_veb_stats(struct i40e_veb *veb)
  **/
 static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 {
+	u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy;
 	struct i40e_pf *pf = vsi->back;
 	struct rtnl_link_stats64 *ons;
 	struct rtnl_link_stats64 *ns;   /* netdev stats */
@@ -780,7 +781,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 	struct i40e_eth_stats *es;     /* device's eth stats */
 	u64 tx_restart, tx_busy;
 	struct i40e_ring *p;
-	u64 rx_page, rx_buf;
 	u64 bytes, packets;
 	unsigned int start;
 	u64 tx_linearize;
@@ -806,6 +806,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 	tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
 	rx_page = 0;
 	rx_buf = 0;
+	rx_reuse = 0;
+	rx_alloc = 0;
+	rx_waive = 0;
+	rx_busy = 0;
 	rcu_read_lock();
 	for (q = 0; q < vsi->num_queue_pairs; q++) {
 		/* locate Tx ring */
@@ -839,6 +843,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 		rx_p += packets;
 		rx_buf += p->rx_stats.alloc_buff_failed;
 		rx_page += p->rx_stats.alloc_page_failed;
+		rx_reuse += p->rx_stats.page_reuse_count;
+		rx_alloc += p->rx_stats.page_alloc_count;
+		rx_waive += p->rx_stats.page_waive_count;
+		rx_busy += p->rx_stats.page_busy_count;
 
 		if (i40e_enabled_xdp_vsi(vsi)) {
 			/* locate XDP ring */
@@ -866,6 +874,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 	vsi->tx_force_wb = tx_force_wb;
 	vsi->rx_page_failed = rx_page;
 	vsi->rx_buf_failed = rx_buf;
+	vsi->rx_page_reuse = rx_reuse;
+	vsi->rx_page_alloc = rx_alloc;
+	vsi->rx_page_waive = rx_waive;
+	vsi->rx_page_busy = rx_busy;
 
 	ns->rx_packets = rx_p;
 	ns->rx_bytes = rx_b;
@@ -2143,19 +2155,19 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
 			  int num_del, int *retval)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
+	enum i40e_admin_queue_err aq_status;
 	i40e_status aq_ret;
-	int aq_err;
 
-	aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL);
-	aq_err = hw->aq.asq_last_status;
+	aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL,
+					   &aq_status);
 
 	/* Explicitly ignore and do not report when firmware returns ENOENT */
-	if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
+	if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) {
 		*retval = -EIO;
 		dev_info(&vsi->back->pdev->dev,
 			 "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
 			 vsi_name, i40e_stat_str(hw, aq_ret),
-			 i40e_aq_str(hw, aq_err));
+			 i40e_aq_str(hw, aq_status));
 	}
 }
 
@@ -2178,10 +2190,10 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 			  int num_add)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
-	int aq_err, fcnt;
+	enum i40e_admin_queue_err aq_status;
+	int fcnt;
 
-	i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL);
-	aq_err = hw->aq.asq_last_status;
+	i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status);
 	fcnt = i40e_update_filter_state(num_add, list, add_head);
 
 	if (fcnt != num_add) {
@@ -2189,17 +2201,19 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 			set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
 			dev_warn(&vsi->back->pdev->dev,
 				 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
-				 i40e_aq_str(hw, aq_err), vsi_name);
+				 i40e_aq_str(hw, aq_status), vsi_name);
 		} else if (vsi->type == I40E_VSI_SRIOV ||
 			   vsi->type == I40E_VSI_VMDQ1 ||
 			   vsi->type == I40E_VSI_VMDQ2) {
 			dev_warn(&vsi->back->pdev->dev,
 				 "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
-				 i40e_aq_str(hw, aq_err), vsi_name, vsi_name);
+				 i40e_aq_str(hw, aq_status), vsi_name,
+					     vsi_name);
 		} else {
 			dev_warn(&vsi->back->pdev->dev,
 				 "Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
-				 i40e_aq_str(hw, aq_err), vsi_name, vsi->type);
+				 i40e_aq_str(hw, aq_status), vsi_name,
+					     vsi->type);
 		}
 	}
 }
@@ -12722,7 +12736,8 @@ static int i40e_set_features(struct net_device *netdev,
 	else
 		i40e_vlan_stripping_disable(vsi);
 
-	if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
+	if (!(features & NETIF_F_HW_TC) &&
+	    (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
 		dev_err(&pf->pdev->dev,
 			"Offloaded tc filters active, can't turn hw_tc_offload off");
 		return -EINVAL;
@@ -13478,6 +13493,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
 	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
 
+	netdev->features &= ~NETIF_F_HW_TC;
+
 	if (vsi->type == I40E_VSI_MAIN) {
 		SET_NETDEV_DEV(netdev, &pf->pdev->dev);
 		ether_addr_copy(mac_addr, hw->mac.perm_addr);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 9241b6005ad3..ebdcde6f1aeb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -27,10 +27,25 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
 		      void *buff, /* can be NULL */ u16  buff_size,
 		      struct i40e_asq_cmd_details *cmd_details);
 i40e_status
+i40e_asq_send_command_v2(struct i40e_hw *hw,
+			 struct i40e_aq_desc *desc,
+			 void *buff, /* can be NULL */
+			 u16  buff_size,
+			 struct i40e_asq_cmd_details *cmd_details,
+			 enum i40e_admin_queue_err *aq_status);
+i40e_status
 i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
 			     void *buff, /* can be NULL */ u16  buff_size,
 			     struct i40e_asq_cmd_details *cmd_details,
 			     bool is_atomic_context);
+i40e_status
+i40e_asq_send_command_atomic_v2(struct i40e_hw *hw,
+				struct i40e_aq_desc *desc,
+				void *buff, /* can be NULL */
+				u16  buff_size,
+				struct i40e_asq_cmd_details *cmd_details,
+				bool is_atomic_context,
+				enum i40e_admin_queue_err *aq_status);
 
 /* debug function for adminq */
 void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
@@ -150,9 +165,19 @@ i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw,
 i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id,
 			struct i40e_aqc_add_macvlan_element_data *mv_list,
 			u16 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status
+i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid,
+		       struct i40e_aqc_add_macvlan_element_data *mv_list,
+		       u16 count, struct i40e_asq_cmd_details *cmd_details,
+		       enum i40e_admin_queue_err *aq_status);
 i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id,
 			struct i40e_aqc_remove_macvlan_element_data *mv_list,
 			u16 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status
+i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_remove_macvlan_element_data *mv_list,
+			  u16 count, struct i40e_asq_cmd_details *cmd_details,
+			  enum i40e_admin_queue_err *aq_status);
 i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
 			u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list,
 			struct i40e_asq_cmd_details *cmd_details,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 66cc79500c10..0eae5858f2fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -830,8 +830,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 	i40e_clean_tx_ring(tx_ring);
 	kfree(tx_ring->tx_bi);
 	tx_ring->tx_bi = NULL;
-	kfree(tx_ring->xsk_descs);
-	tx_ring->xsk_descs = NULL;
 
 	if (tx_ring->desc) {
 		dma_free_coherent(tx_ring->dev, tx_ring->size,
@@ -1382,8 +1380,6 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
 	new_buff->page_offset	= old_buff->page_offset;
 	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
 
-	rx_ring->rx_stats.page_reuse_count++;
-
 	/* clear contents of buffer_info */
 	old_buff->page = NULL;
 }
@@ -1433,13 +1429,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
 	if (!tx_ring->tx_bi)
 		goto err;
 
-	if (ring_is_xdp(tx_ring)) {
-		tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs),
-					     GFP_KERNEL);
-		if (!tx_ring->xsk_descs)
-			goto err;
-	}
-
 	u64_stats_init(&tx_ring->syncp);
 
 	/* round up to nearest 4K */
@@ -1463,8 +1452,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
 	return 0;
 
 err:
-	kfree(tx_ring->xsk_descs);
-	tx_ring->xsk_descs = NULL;
 	kfree(tx_ring->tx_bi);
 	tx_ring->tx_bi = NULL;
 	return -ENOMEM;
@@ -1675,6 +1662,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
 		return false;
 	}
 
+	rx_ring->rx_stats.page_alloc_count++;
+
 	/* map page for use */
 	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
 				 i40e_rx_pg_size(rx_ring),
@@ -1982,32 +1971,43 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
 /**
  * i40e_can_reuse_rx_page - Determine if page can be reused for another Rx
  * @rx_buffer: buffer containing the page
+ * @rx_stats: rx stats structure for the rx ring
  * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
  *
  * If page is reusable, we have a green light for calling i40e_reuse_rx_page,
  * which will assign the current buffer to the buffer that next_to_alloc is
  * pointing to; otherwise, the DMA mapping needs to be destroyed and
- * page freed
+ * page freed.
+ *
+ * rx_stats will be updated to indicate whether the page was waived
+ * or busy if it could not be reused.
  */
 static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
+				   struct i40e_rx_queue_stats *rx_stats,
 				   int rx_buffer_pgcnt)
 {
 	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
 	struct page *page = rx_buffer->page;
 
 	/* Is any reuse possible? */
-	if (!dev_page_is_reusable(page))
+	if (!dev_page_is_reusable(page)) {
+		rx_stats->page_waive_count++;
 		return false;
+	}
 
 #if (PAGE_SIZE < 8192)
 	/* if we are only owner of page we can reuse it */
-	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
+	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) {
+		rx_stats->page_busy_count++;
 		return false;
+	}
 #else
 #define I40E_LAST_OFFSET \
 	(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
-	if (rx_buffer->page_offset > I40E_LAST_OFFSET)
+	if (rx_buffer->page_offset > I40E_LAST_OFFSET) {
+		rx_stats->page_busy_count++;
 		return false;
+	}
 #endif
 
 	/* If we have drained the page fragment pool we need to update
@@ -2237,7 +2237,7 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 			       struct i40e_rx_buffer *rx_buffer,
 			       int rx_buffer_pgcnt)
 {
-	if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
+	if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) {
 		/* hand second half of page back to the ring */
 		i40e_reuse_rx_page(rx_ring, rx_buffer);
 	} else {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index bfc2845c99d1..c471c2da313c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -298,7 +298,9 @@ struct i40e_rx_queue_stats {
 	u64 alloc_page_failed;
 	u64 alloc_buff_failed;
 	u64 page_reuse_count;
-	u64 realloc_count;
+	u64 page_alloc_count;
+	u64 page_waive_count;
+	u64 page_busy_count;
 };
 
 enum i40e_ring_state_t {
@@ -390,7 +392,6 @@ struct i40e_ring {
 	u16 rx_offset;
 	struct xdp_rxq_info xdp_rxq;
 	struct xsk_buff_pool *xsk_pool;
-	struct xdp_desc *xsk_descs;      /* For storing descriptors in the AF_XDP ZC path */
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 67e9844e2076..5a997b0d07d8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -471,11 +471,11 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
  **/
 static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 {
-	struct xdp_desc *descs = xdp_ring->xsk_descs;
+	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
 	u32 nb_pkts, nb_processed = 0;
 	unsigned int total_bytes = 0;
 
-	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget);
+	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
 	if (!nb_pkts)
 		return true;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e2e3ef7fba7f..e3df0134dc77 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2803,6 +2803,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
 		/* clone ring and setup updated count */
 		xdp_rings[i] = *vsi->xdp_rings[i];
 		xdp_rings[i].count = new_tx_cnt;
+		xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
+		xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
 		xdp_rings[i].desc = NULL;
 		xdp_rings[i].tx_buf = NULL;
 		err = ice_setup_tx_ring(&xdp_rings[i]);
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index fc3580167e7b..263a2e7577a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -227,6 +227,11 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos)
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
 		qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i];
+
+	qos->pfc_mode = dcbx_cfg->pfc_mode;
+	if (qos->pfc_mode == IIDC_DSCP_PFC_MODE)
+		for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++)
+			qos->dscp_map[i] = dcbx_cfg->dscp_map[i];
 }
 EXPORT_SYMBOL_GPL(ice_get_qos_params);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f46af3b34074..63f43400a146 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2495,10 +2495,10 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
 		xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
 		xdp_ring->vsi = vsi;
 		xdp_ring->netdev = NULL;
-		xdp_ring->next_dd = ICE_TX_THRESH - 1;
-		xdp_ring->next_rs = ICE_TX_THRESH - 1;
 		xdp_ring->dev = dev;
 		xdp_ring->count = vsi->num_tx_desc;
+		xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
+		xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
 		WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
 		if (ice_setup_tx_ring(xdp_ring))
 			goto free_xdp_rings;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index c2258bee8ecb..7b9b3b750bf0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -173,6 +173,8 @@ tx_skip_free:
 
 	tx_ring->next_to_use = 0;
 	tx_ring->next_to_clean = 0;
+	tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1;
+	tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1;
 
 	if (!tx_ring->netdev)
 		return;
@@ -1467,7 +1469,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 		bool wd;
 
 		if (tx_ring->xsk_pool)
-			wd = ice_clean_tx_irq_zc(tx_ring, budget);
+			wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget);
 		else if (ice_ring_is_xdp(tx_ring))
 			wd = true;
 		else
@@ -1520,7 +1522,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	/* Exit the polling mode, but don't re-enable interrupts if stack might
 	 * poll us due to busy-polling
 	 */
-	if (likely(napi_complete_done(napi, work_done))) {
+	if (napi_complete_done(napi, work_done)) {
 		ice_net_dim(q_vector);
 		ice_enable_interrupt(q_vector);
 	} else {
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index b7b3bd4816f0..466253ac2ee1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -13,7 +13,6 @@
 #define ICE_MAX_CHAINED_RX_BUFS	5
 #define ICE_MAX_BUF_TXD		8
 #define ICE_MIN_TX_LEN		17
-#define ICE_TX_THRESH		32
 
 /* The size limit for a transmit buffer in a descriptor is (16K - 1).
  * In order to align with the read requests we will align the value to
@@ -111,6 +110,8 @@ static inline int ice_skb_pad(void)
 	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 	      (R)->next_to_clean - (R)->next_to_use - 1)
 
+#define ICE_RING_QUARTER(R) ((R)->count >> 2)
+
 #define ICE_TX_FLAGS_TSO	BIT(0)
 #define ICE_TX_FLAGS_HW_VLAN	BIT(1)
 #define ICE_TX_FLAGS_SW_VLAN	BIT(2)
@@ -321,17 +322,18 @@ struct ice_tx_ring {
 	u16 count;			/* Number of descriptors */
 	u16 q_index;			/* Queue number of ring */
 	/* stats structs */
-	struct ice_q_stats	stats;
-	struct u64_stats_sync syncp;
 	struct ice_txq_stats tx_stats;
-
 	/* CL3 - 3rd cacheline starts here */
+	struct ice_q_stats	stats;
+	struct u64_stats_sync syncp;
 	struct rcu_head rcu;		/* to avoid race on free */
 	DECLARE_BITMAP(xps_state, ICE_TX_NBITS);	/* XPS Config State */
 	struct ice_channel *ch;
 	struct ice_ptp_tx *tx_tstamps;
 	spinlock_t tx_lock;
 	u32 txq_teid;			/* Added Tx queue TEID */
+	/* CL4 - 4th cacheline starts here */
+	u16 xdp_tx_active;
 #define ICE_TX_FLAGS_RING_XDP		BIT(0)
 	u8 flags;
 	u8 dcb_tc;			/* Traffic class of ring */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 0e87b98e0966..eb21cec1d772 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -222,6 +222,7 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
 static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
 {
 	unsigned int total_bytes = 0, total_pkts = 0;
+	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
 	u16 ntc = xdp_ring->next_to_clean;
 	struct ice_tx_desc *next_dd_desc;
 	u16 next_dd = xdp_ring->next_dd;
@@ -233,7 +234,7 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
 	    cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
 		return;
 
-	for (i = 0; i < ICE_TX_THRESH; i++) {
+	for (i = 0; i < tx_thresh; i++) {
 		tx_buf = &xdp_ring->tx_buf[ntc];
 
 		total_bytes += tx_buf->bytecount;
@@ -254,9 +255,9 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
 	}
 
 	next_dd_desc->cmd_type_offset_bsz = 0;
-	xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH;
+	xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh;
 	if (xdp_ring->next_dd > xdp_ring->count)
-		xdp_ring->next_dd = ICE_TX_THRESH - 1;
+		xdp_ring->next_dd = tx_thresh - 1;
 	xdp_ring->next_to_clean = ntc;
 	ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
 }
@@ -269,12 +270,13 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
  */
 int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
 {
+	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
 	u16 i = xdp_ring->next_to_use;
 	struct ice_tx_desc *tx_desc;
 	struct ice_tx_buf *tx_buf;
 	dma_addr_t dma;
 
-	if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH)
+	if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh)
 		ice_clean_xdp_irq(xdp_ring);
 
 	if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
@@ -300,13 +302,14 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
 	tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0,
 						      size, 0);
 
+	xdp_ring->xdp_tx_active++;
 	i++;
 	if (i == xdp_ring->count) {
 		i = 0;
 		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
 		tx_desc->cmd_type_offset_bsz |=
 			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
-		xdp_ring->next_rs = ICE_TX_THRESH - 1;
+		xdp_ring->next_rs = tx_thresh - 1;
 	}
 	xdp_ring->next_to_use = i;
 
@@ -314,7 +317,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
 		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
 		tx_desc->cmd_type_offset_bsz |=
 			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
-		xdp_ring->next_rs += ICE_TX_THRESH;
+		xdp_ring->next_rs += tx_thresh;
 	}
 
 	return ICE_XDP_TX;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index feb874bde171..ed430d566274 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -327,6 +327,13 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 	bool if_running, pool_present = !!pool;
 	int ret = 0, pool_failure = 0;
 
+	if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
+	    !is_power_of_2(vsi->tx_rings[qid]->count)) {
+		netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
+		pool_failure = -EINVAL;
+		goto failure;
+	}
+
 	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
 
 	if (if_running) {
@@ -349,6 +356,7 @@ xsk_pool_if_up:
 			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
 	}
 
+failure:
 	if (pool_failure) {
 		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
 			   pool_present ? "en" : "dis", pool_failure);
@@ -359,33 +367,28 @@ xsk_pool_if_up:
 }
 
 /**
- * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
- * @rx_ring: Rx ring
+ * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
+ * @pool: XSK Buffer pool to pull the buffers from
+ * @xdp: SW ring of xdp_buff that will hold the buffers
+ * @rx_desc: Pointer to Rx descriptors that will be filled
  * @count: The number of buffers to allocate
  *
  * This function allocates a number of Rx buffers from the fill ring
  * or the internal recycle mechanism and places them on the Rx ring.
  *
- * Returns true if all allocations were successful, false if any fail.
+ * Note that ring wrap should be handled by caller of this function.
+ *
+ * Returns the amount of allocated Rx descriptors
  */
-bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+			     union ice_32b_rx_flex_desc *rx_desc, u16 count)
 {
-	union ice_32b_rx_flex_desc *rx_desc;
-	u16 ntu = rx_ring->next_to_use;
-	struct xdp_buff **xdp;
-	u32 nb_buffs, i;
 	dma_addr_t dma;
+	u16 buffs;
+	int i;
 
-	rx_desc = ICE_RX_DESC(rx_ring, ntu);
-	xdp = ice_xdp_buf(rx_ring, ntu);
-
-	nb_buffs = min_t(u16, count, rx_ring->count - ntu);
-	nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
-	if (!nb_buffs)
-		return false;
-
-	i = nb_buffs;
-	while (i--) {
+	buffs = xsk_buff_alloc_batch(pool, xdp, count);
+	for (i = 0; i < buffs; i++) {
 		dma = xsk_buff_xdp_get_dma(*xdp);
 		rx_desc->read.pkt_addr = cpu_to_le64(dma);
 		rx_desc->wb.status_error0 = 0;
@@ -394,13 +397,77 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
 		xdp++;
 	}
 
+	return buffs;
+}
+
+/**
+ * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Place the @count of descriptors onto Rx ring. Handle the ring wrap
+ * for case where space from next_to_use up to the end of ring is less
+ * than @count. Finally do a tail bump.
+ *
+ * Returns true if all allocations were successful, false if any fail.
+ */
+static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u32 nb_buffs_extra = 0, nb_buffs;
+	u16 ntu = rx_ring->next_to_use;
+	u16 total_count = count;
+	struct xdp_buff **xdp;
+
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	xdp = ice_xdp_buf(rx_ring, ntu);
+
+	if (ntu + count >= rx_ring->count) {
+		nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
+						   rx_desc,
+						   rx_ring->count - ntu);
+		rx_desc = ICE_RX_DESC(rx_ring, 0);
+		xdp = ice_xdp_buf(rx_ring, 0);
+		ntu = 0;
+		count -= nb_buffs_extra;
+		ice_release_rx_desc(rx_ring, 0);
+	}
+
+	nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
+
 	ntu += nb_buffs;
 	if (ntu == rx_ring->count)
 		ntu = 0;
 
-	ice_release_rx_desc(rx_ring, ntu);
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
 
-	return count == nb_buffs;
+	return total_count == (nb_buffs_extra + nb_buffs);
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Wrapper for internal allocation routine; figure out how many tail
+ * bumps should take place based on the given threshold
+ *
+ * Returns true if all calls to internal alloc routine succeeded
+ */
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+	u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
+	u16 batched, leftover, i, tail_bumps;
+
+	batched = ALIGN_DOWN(count, rx_thresh);
+	tail_bumps = batched / rx_thresh;
+	leftover = count & (rx_thresh - 1);
+
+	for (i = 0; i < tail_bumps; i++)
+		if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
+			return false;
+	return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
 }
 
 /**
@@ -616,134 +683,221 @@ construct_skb:
 }
 
 /**
- * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
  * @xdp_ring: XDP Tx ring
- * @budget: max number of frames to xmit
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+	xdp_ring->xdp_tx_active--;
+	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
+ * @xdp_ring: XDP ring to clean
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
  *
- * Returns true if cleanup/transmission is done.
+ * Returns count of cleaned descriptors
  */
-static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget)
+static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
 {
-	struct ice_tx_desc *tx_desc = NULL;
-	bool work_done = true;
-	struct xdp_desc desc;
-	dma_addr_t dma;
+	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+	int budget = napi_budget / tx_thresh;
+	u16 next_dd = xdp_ring->next_dd;
+	u16 ntc, cleared_dds = 0;
 
-	while (likely(budget-- > 0)) {
+	do {
+		struct ice_tx_desc *next_dd_desc;
+		u16 desc_cnt = xdp_ring->count;
 		struct ice_tx_buf *tx_buf;
+		u32 xsk_frames;
+		u16 i;
 
-		if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
-			xdp_ring->tx_stats.tx_busy++;
-			work_done = false;
+		next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
+		if (!(next_dd_desc->cmd_type_offset_bsz &
+		    cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
 			break;
-		}
-
-		tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
 
-		if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
-			break;
-
-		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
-		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
-						 desc.len);
+		cleared_dds++;
+		xsk_frames = 0;
+		if (likely(!xdp_ring->xdp_tx_active)) {
+			xsk_frames = tx_thresh;
+			goto skip;
+		}
 
-		tx_buf->bytecount = desc.len;
+		ntc = xdp_ring->next_to_clean;
 
-		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
-		tx_desc->buf_addr = cpu_to_le64(dma);
-		tx_desc->cmd_type_offset_bsz =
-			ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0);
+		for (i = 0; i < tx_thresh; i++) {
+			tx_buf = &xdp_ring->tx_buf[ntc];
 
-		xdp_ring->next_to_use++;
-		if (xdp_ring->next_to_use == xdp_ring->count)
-			xdp_ring->next_to_use = 0;
-	}
-
-	if (tx_desc) {
-		ice_xdp_ring_update_tail(xdp_ring);
-		xsk_tx_release(xdp_ring->xsk_pool);
-	}
+			if (tx_buf->raw_buf) {
+				ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+				tx_buf->raw_buf = NULL;
+			} else {
+				xsk_frames++;
+			}
 
-	return budget > 0 && work_done;
+			ntc++;
+			if (ntc >= xdp_ring->count)
+				ntc = 0;
+		}
+skip:
+		xdp_ring->next_to_clean += tx_thresh;
+		if (xdp_ring->next_to_clean >= desc_cnt)
+			xdp_ring->next_to_clean -= desc_cnt;
+		if (xsk_frames)
+			xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+		next_dd_desc->cmd_type_offset_bsz = 0;
+		next_dd = next_dd + tx_thresh;
+		if (next_dd >= desc_cnt)
+			next_dd = tx_thresh - 1;
+	} while (budget--);
+
+	xdp_ring->next_dd = next_dd;
+
+	return cleared_dds * tx_thresh;
 }
 
 /**
- * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
- * @xdp_ring: XDP Tx ring
- * @tx_buf: Tx buffer to clean
+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
+ * @total_bytes: bytes accumulator that will be used for stats update
  */
-static void
-ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
+			 unsigned int *total_bytes)
 {
-	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
-	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
-			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
-	dma_unmap_len_set(tx_buf, len, 0);
+	struct ice_tx_desc *tx_desc;
+	dma_addr_t dma;
+
+	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
+	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+
+	tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+						      0, desc->len, 0);
+
+	*total_bytes += desc->len;
 }
 
 /**
- * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
- * @xdp_ring: XDP Tx ring
- * @budget: NAPI budget
- *
- * Returns true if cleanup/tranmission is done.
+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @total_bytes: bytes accumulator that will be used for stats update
  */
-bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget)
+static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+			       unsigned int *total_bytes)
 {
-	int total_packets = 0, total_bytes = 0;
-	s16 ntc = xdp_ring->next_to_clean;
+	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+	u16 ntu = xdp_ring->next_to_use;
 	struct ice_tx_desc *tx_desc;
-	struct ice_tx_buf *tx_buf;
-	u32 xsk_frames = 0;
-	bool xmit_done;
+	u32 i;
 
-	tx_desc = ICE_TX_DESC(xdp_ring, ntc);
-	tx_buf = &xdp_ring->tx_buf[ntc];
-	ntc -= xdp_ring->count;
+	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+		dma_addr_t dma;
 
-	do {
-		if (!(tx_desc->cmd_type_offset_bsz &
-		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
-			break;
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
 
-		total_bytes += tx_buf->bytecount;
-		total_packets++;
+		tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+							      0, descs[i].len, 0);
 
-		if (tx_buf->raw_buf) {
-			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
-			tx_buf->raw_buf = NULL;
-		} else {
-			xsk_frames++;
-		}
+		*total_bytes += descs[i].len;
+	}
 
-		tx_desc->cmd_type_offset_bsz = 0;
-		tx_buf++;
-		tx_desc++;
-		ntc++;
+	xdp_ring->next_to_use = ntu;
 
-		if (unlikely(!ntc)) {
-			ntc -= xdp_ring->count;
-			tx_buf = xdp_ring->tx_buf;
-			tx_desc = ICE_TX_DESC(xdp_ring, 0);
-		}
+	if (xdp_ring->next_to_use > xdp_ring->next_rs) {
+		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+		tx_desc->cmd_type_offset_bsz |=
+			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+		xdp_ring->next_rs += tx_thresh;
+	}
+}
 
-		prefetch(tx_desc);
+/**
+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @nb_pkts: count of packets to be send
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+				u32 nb_pkts, unsigned int *total_bytes)
+{
+	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+	u32 batched, leftover, i;
+
+	batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
+	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+	for (i = 0; i < batched; i += PKTS_PER_BATCH)
+		ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+	for (; i < batched + leftover; i++)
+		ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+
+	if (xdp_ring->next_to_use > xdp_ring->next_rs) {
+		struct ice_tx_desc *tx_desc;
+
+		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+		tx_desc->cmd_type_offset_bsz |=
+			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+		xdp_ring->next_rs += tx_thresh;
+	}
+}
 
-	} while (likely(--budget));
+/**
+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @budget: number of free descriptors on HW Tx ring that can be used
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
+{
+	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+	u32 nb_pkts, nb_processed = 0;
+	unsigned int total_bytes = 0;
+
+	if (budget < tx_thresh)
+		budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
+
+	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+	if (!nb_pkts)
+		return true;
+
+	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
+		struct ice_tx_desc *tx_desc;
+
+		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+		ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
+		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+		tx_desc->cmd_type_offset_bsz |=
+			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+		xdp_ring->next_rs = tx_thresh - 1;
+		xdp_ring->next_to_use = 0;
+	}
 
-	ntc += xdp_ring->count;
-	xdp_ring->next_to_clean = ntc;
+	ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
+			    &total_bytes);
 
-	if (xsk_frames)
-		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+	ice_xdp_ring_update_tail(xdp_ring);
+	ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
 
 	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
 		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
 
-	ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
-	xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
-
-	return budget > 0 && xmit_done;
+	return nb_pkts < budget;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index 4c7bd8e9dfc4..0cbb5793b5b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -6,19 +6,37 @@
 #include "ice_txrx.h"
 #include "ice.h"
 
+#define PKTS_PER_BATCH 8
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for
+#elif __GNUC__ >= 4
+#define loop_unrolled_for _Pragma("GCC unroll 8") for
+#else
+#define loop_unrolled_for for
+#endif
+
 struct ice_vsi;
 
 #ifdef CONFIG_XDP_SOCKETS
 int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
 		       u16 qid);
 int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget);
-bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget);
 int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
 bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
 bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
 void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
 void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget);
 #else
+static inline bool
+ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
+	    u32 __always_unused budget,
+	    int __always_unused napi_budget)
+{
+	return false;
+}
+
 static inline int
 ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
 		   struct xsk_buff_pool __always_unused *pool,
@@ -35,13 +53,6 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
 }
 
 static inline bool
-ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring,
-		    int __always_unused budget)
-{
-	return false;
-}
-
-static inline bool
 ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring,
 		     u16 __always_unused count)
 {
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 51a2dcaf553d..2a5782063f4c 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -965,10 +965,6 @@ static int igb_set_ringparam(struct net_device *netdev,
 			memcpy(&temp_ring[i], adapter->rx_ring[i],
 			       sizeof(struct igb_ring));
 
-			/* Clear copied XDP RX-queue info */
-			memset(&temp_ring[i].xdp_rxq, 0,
-			       sizeof(temp_ring[i].xdp_rxq));
-
 			temp_ring[i].count = new_rx_count;
 			err = igb_setup_rx_resources(&temp_ring[i]);
 			if (err) {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index bfa321e4003f..34b33b21e0dc 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -4345,7 +4345,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
 {
 	struct igb_adapter *adapter = netdev_priv(rx_ring->netdev);
 	struct device *dev = rx_ring->dev;
-	int size;
+	int size, res;
+
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+			       rx_ring->queue_index, 0);
+	if (res < 0) {
+		dev_err(dev, "Failed to register xdp_rxq index %u\n",
+			rx_ring->queue_index);
+		return res;
+	}
 
 	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
 
@@ -4368,14 +4379,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
 
 	rx_ring->xdp_prog = adapter->xdp_prog;
 
-	/* XDP RX-queue info */
-	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
-			     rx_ring->queue_index, 0) < 0)
-		goto err;
-
 	return 0;
 
 err:
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index b965fb809d84..74b2c590ed5d 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -505,6 +505,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
 	u8 index = rx_ring->queue_index;
 	int size, desc_len, res;
 
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
 			       rx_ring->q_vector->napi.napi_id);
 	if (res < 0) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 3631d612aaca..25491edc35ce 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -578,31 +578,78 @@ void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable)
 	}
 }
 
+static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id,
+					 u8 *tx_pause, u8 *rx_pause)
+{
+	struct cgx *cgx = cgxd;
+	u64 cfg;
+
+	if (is_dev_rpm(cgx))
+		return 0;
+
+	if (!is_lmac_valid(cgx, lmac_id))
+		return -ENODEV;
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+	*rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK);
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
+	*tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV);
+	return 0;
+}
+
 /* Enable or disable forwarding received pause frames to Tx block */
 void cgx_lmac_enadis_rx_pause_fwding(void *cgxd, int lmac_id, bool enable)
 {
 	struct cgx *cgx = cgxd;
+	u8 rx_pause, tx_pause;
+	bool is_pfc_enabled;
+	struct lmac *lmac;
 	u64 cfg;
 
 	if (!cgx)
 		return;
 
-	if (enable) {
-		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
-		cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+	lmac = lmac_pdata(lmac_id, cgx);
+	if (!lmac)
+		return;
 
-		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
-		cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id,	CGXX_SMUX_RX_FRM_CTL, cfg);
+	/* Pause frames are not enabled just return */
+	if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max))
+		return;
+
+	cgx_lmac_get_pause_frm_status(cgx, lmac_id, &rx_pause, &tx_pause);
+	is_pfc_enabled = rx_pause ? false : true;
+
+	if (enable) {
+		if (!is_pfc_enabled) {
+			cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+			cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+			cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+			cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+			cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+			cgx_write(cgx, lmac_id,	CGXX_SMUX_RX_FRM_CTL, cfg);
+		} else {
+			cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
+			cfg |= CGXX_SMUX_CBFC_CTL_BCK_EN;
+			cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg);
+		}
 	} else {
-		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
-		cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
 
-		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
-		cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id,	CGXX_SMUX_RX_FRM_CTL, cfg);
+		if (!is_pfc_enabled) {
+			cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+			cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+			cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+			cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+			cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+			cgx_write(cgx, lmac_id,	CGXX_SMUX_RX_FRM_CTL, cfg);
+		} else {
+			cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
+			cfg &= ~CGXX_SMUX_CBFC_CTL_BCK_EN;
+			cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg);
+		}
 	}
 }
 
@@ -722,26 +769,6 @@ int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable)
 	return !!(last & DATA_PKT_TX_EN);
 }
 
-static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id,
-					 u8 *tx_pause, u8 *rx_pause)
-{
-	struct cgx *cgx = cgxd;
-	u64 cfg;
-
-	if (is_dev_rpm(cgx))
-		return 0;
-
-	if (!is_lmac_valid(cgx, lmac_id))
-		return -ENODEV;
-
-	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
-	*rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK);
-
-	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
-	*tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV);
-	return 0;
-}
-
 static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id,
 				     u8 tx_pause, u8 rx_pause)
 {
@@ -782,21 +809,8 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable)
 
 	if (!is_lmac_valid(cgx, lmac_id))
 		return;
-	if (enable) {
-		/* Enable receive pause frames */
-		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
-		cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
-
-		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
-		cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
-
-		/* Enable pause frames transmission */
-		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
-		cfg |= CGX_SMUX_TX_CTL_L2P_BP_CONV;
-		cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg);
 
+	if (enable) {
 		/* Set pause time and interval */
 		cgx_write(cgx, lmac_id, CGXX_SMUX_TX_PAUSE_PKT_TIME,
 			  DEFAULT_PAUSE_TIME);
@@ -813,21 +827,120 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable)
 		cfg &= ~0xFFFFULL;
 		cgx_write(cgx, lmac_id, CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL,
 			  cfg | (DEFAULT_PAUSE_TIME / 2));
-	} else {
-		/* ALL pause frames received are completely ignored */
-		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
-		cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+	}
 
-		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
-		cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
-		cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+	/* ALL pause frames received are completely ignored */
+	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+	cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+	cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+	cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+	cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+	/* Disable pause frames transmission */
+	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
+	cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV;
+	cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg);
+
+	cfg = cgx_read(cgx, 0, CGXX_CMR_RX_OVR_BP);
+	cfg |= CGX_CMR_RX_OVR_BP_EN(lmac_id);
+	cfg &= ~CGX_CMR_RX_OVR_BP_BP(lmac_id);
+	cgx_write(cgx, 0, CGXX_CMR_RX_OVR_BP, cfg);
+}
+
+int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause,
+		       int pfvf_idx)
+{
+	struct cgx *cgx = cgxd;
+	struct lmac *lmac;
+
+	lmac = lmac_pdata(lmac_id, cgx);
+	if (!lmac)
+		return -ENODEV;
+
+	if (!rx_pause)
+		clear_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap);
+	else
+		set_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap);
+
+	if (!tx_pause)
+		clear_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap);
+	else
+		set_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap);
+
+	/* check if other pfvfs are using flow control */
+	if (!rx_pause && bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) {
+		dev_warn(&cgx->pdev->dev,
+			 "Receive Flow control disable not permitted as its used by other PFVFs\n");
+		return -EPERM;
+	}
+
+	if (!tx_pause && bitmap_weight(lmac->tx_fc_pfvf_bmap.bmap, lmac->tx_fc_pfvf_bmap.max)) {
+		dev_warn(&cgx->pdev->dev,
+			 "Transmit Flow control disable not permitted as its used by other PFVFs\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause,
+			u8 rx_pause, u16 pfc_en)
+{
+	struct cgx *cgx = cgxd;
+	u64 cfg;
+
+	if (!is_lmac_valid(cgx, lmac_id))
+		return -ENODEV;
+
+	/* Return as no traffic classes are requested */
+	if (tx_pause && !pfc_en)
+		return 0;
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
 
-		/* Disable pause frames transmission */
-		cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
-		cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV;
-		cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg);
+	if (rx_pause) {
+		cfg |= (CGXX_SMUX_CBFC_CTL_RX_EN |
+			CGXX_SMUX_CBFC_CTL_BCK_EN |
+			CGXX_SMUX_CBFC_CTL_DRP_EN);
+	} else {
+		cfg &= ~(CGXX_SMUX_CBFC_CTL_RX_EN |
+			CGXX_SMUX_CBFC_CTL_BCK_EN |
+			CGXX_SMUX_CBFC_CTL_DRP_EN);
 	}
+
+	if (tx_pause)
+		cfg |= CGXX_SMUX_CBFC_CTL_TX_EN;
+	else
+		cfg &= ~CGXX_SMUX_CBFC_CTL_TX_EN;
+
+	cfg = FIELD_SET(CGX_PFC_CLASS_MASK, pfc_en, cfg);
+
+	cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg);
+
+	/* Write source MAC address which will be filled into PFC packet */
+	cfg = cgx_lmac_addr_get(cgx->cgx_id, lmac_id);
+	cgx_write(cgx, lmac_id, CGXX_SMUX_SMAC, cfg);
+
+	return 0;
+}
+
+int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause,
+			     u8 *rx_pause)
+{
+	struct cgx *cgx = cgxd;
+	u64 cfg;
+
+	if (!is_lmac_valid(cgx, lmac_id))
+		return -ENODEV;
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
+
+	*rx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_RX_EN);
+	*tx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_TX_EN);
+
+	return 0;
 }
 
 void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable)
@@ -1489,6 +1602,16 @@ static int cgx_lmac_init(struct cgx *cgx)
 		/* Reserve first entry for default MAC address */
 		set_bit(0, lmac->mac_to_index_bmap.bmap);
 
+		lmac->rx_fc_pfvf_bmap.max = 128;
+		err = rvu_alloc_bitmap(&lmac->rx_fc_pfvf_bmap);
+		if (err)
+			goto err_dmac_bmap_free;
+
+		lmac->tx_fc_pfvf_bmap.max = 128;
+		err = rvu_alloc_bitmap(&lmac->tx_fc_pfvf_bmap);
+		if (err)
+			goto err_rx_fc_bmap_free;
+
 		init_waitqueue_head(&lmac->wq_cmd_cmplt);
 		mutex_init(&lmac->cmd_lock);
 		spin_lock_init(&lmac->event_cb_lock);
@@ -1505,6 +1628,10 @@ static int cgx_lmac_init(struct cgx *cgx)
 	return cgx_lmac_verify_fwi_version(cgx);
 
 err_bitmap_free:
+	rvu_free_bitmap(&lmac->tx_fc_pfvf_bmap);
+err_rx_fc_bmap_free:
+	rvu_free_bitmap(&lmac->rx_fc_pfvf_bmap);
+err_dmac_bmap_free:
 	rvu_free_bitmap(&lmac->mac_to_index_bmap);
 err_name_free:
 	kfree(lmac->name);
@@ -1572,6 +1699,8 @@ static struct mac_ops	cgx_mac_ops    = {
 	.mac_enadis_ptp_config =	cgx_lmac_ptp_config,
 	.mac_rx_tx_enable =		cgx_lmac_rx_tx_enable,
 	.mac_tx_enable =		cgx_lmac_tx_enable,
+	.pfc_config =                   cgx_lmac_pfc_config,
+	.mac_get_pfc_frm_cfg   =        cgx_lmac_get_pfc_frm_cfg,
 };
 
 static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index ab1e4abdea38..bd2f33a26eee 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -76,6 +76,13 @@
 #define CGXX_SMUX_TX_CTL		0x20178
 #define CGXX_SMUX_TX_PAUSE_PKT_TIME	0x20110
 #define CGXX_SMUX_TX_PAUSE_PKT_INTERVAL	0x20120
+#define CGXX_SMUX_SMAC                        0x20108
+#define CGXX_SMUX_CBFC_CTL                    0x20218
+#define CGXX_SMUX_CBFC_CTL_RX_EN             BIT_ULL(0)
+#define CGXX_SMUX_CBFC_CTL_TX_EN             BIT_ULL(1)
+#define CGXX_SMUX_CBFC_CTL_DRP_EN            BIT_ULL(2)
+#define CGXX_SMUX_CBFC_CTL_BCK_EN            BIT_ULL(3)
+#define CGX_PFC_CLASS_MASK		     GENMASK_ULL(47, 32)
 #define CGXX_GMP_GMI_TX_PAUSE_PKT_TIME	0x38230
 #define CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL	0x38248
 #define CGX_SMUX_TX_CTL_L2P_BP_CONV	BIT_ULL(7)
@@ -172,4 +179,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset);
 int cgx_lmac_addr_update(u8 cgx_id, u8 lmac_id, u8 *mac_addr, u8 index);
 u64 cgx_read_dmac_ctrl(void *cgxd, int lmac_id);
 u64 cgx_read_dmac_entry(void *cgxd, int index);
+int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause,
+			u16 pfc_en);
+int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause,
+			     u8 *rx_pause);
+int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause,
+		       int pfvf_idx);
 #endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
index b33e7d1d0851..f30581bf0688 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
@@ -17,6 +17,8 @@
  * @resp:		command response
  * @link_info:		link related information
  * @mac_to_index_bmap:	Mac address to CGX table index mapping
+ * @rx_fc_pfvf_bmap:    Receive flow control enabled netdev mapping
+ * @tx_fc_pfvf_bmap:    Transmit flow control enabled netdev mapping
  * @event_cb:		callback for linkchange events
  * @event_cb_lock:	lock for serializing callback with unregister
  * @cgx:		parent cgx port
@@ -33,6 +35,8 @@ struct lmac {
 	u64 resp;
 	struct cgx_link_user_info link_info;
 	struct rsrc_bmap mac_to_index_bmap;
+	struct rsrc_bmap rx_fc_pfvf_bmap;
+	struct rsrc_bmap tx_fc_pfvf_bmap;
 	struct cgx_event_cb event_cb;
 	/* lock for serializing callback with unregister */
 	spinlock_t event_cb_lock;
@@ -110,6 +114,12 @@ struct mac_ops {
 
 	int			(*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable);
 	int			(*mac_tx_enable)(void *cgxd, int lmac_id, bool enable);
+	int                     (*pfc_config)(void *cgxd, int lmac_id,
+					      u8 tx_pause, u8 rx_pause, u16 pfc_en);
+
+	int                     (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id,
+						       u8 *tx_pause, u8 *rx_pause);
+
 };
 
 struct cgx {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 58e2aeebc14f..550cb11197bf 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -172,6 +172,8 @@ M(RPM_STATS,		0x21C, rpm_stats, msg_req, rpm_stats_rsp)	\
 M(CGX_MAC_ADDR_RESET,	0x21D, cgx_mac_addr_reset, msg_req, msg_rsp)	\
 M(CGX_MAC_ADDR_UPDATE,	0x21E, cgx_mac_addr_update, cgx_mac_addr_update_req, \
 			       msg_rsp)					\
+M(CGX_PRIO_FLOW_CTRL_CFG, 0x21F, cgx_prio_flow_ctrl_cfg, cgx_pfc_cfg,  \
+				 cgx_pfc_rsp)                               \
 /* NPA mbox IDs (range 0x400 - 0x5FF) */				\
 M(NPA_LF_ALLOC,		0x400, npa_lf_alloc,				\
 				npa_lf_alloc_req, npa_lf_alloc_rsp)	\
@@ -609,6 +611,21 @@ struct rpm_stats_rsp {
 	u64 tx_stats[RPM_TX_STATS_COUNT];
 };
 
+struct cgx_pfc_cfg {
+	struct mbox_msghdr hdr;
+	u8 rx_pause;
+	u8 tx_pause;
+	u16 pfc_en; /*  bitmap indicating pfc enabled traffic classes */
+};
+
+struct cgx_pfc_rsp {
+	struct mbox_msghdr hdr;
+	u8 rx_pause;
+	u8 tx_pause;
+};
+
+ /* NPA mbox message formats */
+
 struct npc_set_pkind {
 	struct mbox_msghdr hdr;
 #define OTX2_PRIV_FLAGS_DEFAULT  BIT_ULL(0)
@@ -1603,6 +1620,8 @@ enum cgx_af_status {
 	LMAC_AF_ERR_INVALID_PARAM	= -1101,
 	LMAC_AF_ERR_PF_NOT_MAPPED	= -1102,
 	LMAC_AF_ERR_PERM_DENIED		= -1103,
+	LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED       = -1104,
+	LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED = -1105,
 };
 
 #endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
index 9ea2f6ac38ec..d7a8aad46e12 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
@@ -32,6 +32,8 @@ static struct mac_ops	rpm_mac_ops   = {
 	.mac_enadis_ptp_config =	rpm_lmac_ptp_config,
 	.mac_rx_tx_enable =		rpm_lmac_rx_tx_enable,
 	.mac_tx_enable =		rpm_lmac_tx_enable,
+	.pfc_config =                   rpm_lmac_pfc_config,
+	.mac_get_pfc_frm_cfg   =        rpm_lmac_get_pfc_frm_cfg,
 };
 
 struct mac_ops *rpm_get_mac_ops(void)
@@ -96,11 +98,20 @@ int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable)
 void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable)
 {
 	rpm_t *rpm = rpmd;
+	struct lmac *lmac;
 	u64 cfg;
 
 	if (!rpm)
 		return;
 
+	lmac = lmac_pdata(lmac_id, rpm);
+	if (!lmac)
+		return;
+
+	/* Pause frames are not enabled just return */
+	if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max))
+		return;
+
 	if (enable) {
 		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
 		cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
@@ -122,13 +133,93 @@ int rpm_lmac_get_pause_frm_status(void *rpmd, int lmac_id,
 		return -ENODEV;
 
 	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-	*rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE);
+	if (!(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE)) {
+		*rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE);
+		*tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE);
+	}
 
-	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-	*tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE);
 	return 0;
 }
 
+static void rpm_cfg_pfc_quanta_thresh(rpm_t *rpm, int lmac_id, u16 pfc_en,
+				      bool enable)
+{
+	u64 quanta_offset = 0, quanta_thresh = 0, cfg;
+	int i, shift;
+
+	/* Set pause time and interval */
+	for_each_set_bit(i, (unsigned long *)&pfc_en, 16) {
+		switch (i) {
+		case 0:
+		case 1:
+			quanta_offset = RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL01_QUANTA_THRESH;
+			break;
+		case 2:
+		case 3:
+			quanta_offset = RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL23_QUANTA_THRESH;
+			break;
+		case 4:
+		case 5:
+			quanta_offset = RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL45_QUANTA_THRESH;
+			break;
+		case 6:
+		case 7:
+			quanta_offset = RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL67_QUANTA_THRESH;
+			break;
+		case 8:
+		case 9:
+			quanta_offset = RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL89_QUANTA_THRESH;
+			break;
+		case 10:
+		case 11:
+			quanta_offset = RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH;
+			break;
+		case 12:
+		case 13:
+			quanta_offset = RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH;
+			break;
+		case 14:
+		case 15:
+			quanta_offset = RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA;
+			quanta_thresh = RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH;
+			break;
+		}
+
+		if (!quanta_offset || !quanta_thresh)
+			continue;
+
+		shift = (i % 2) ? 1 : 0;
+		cfg = rpm_read(rpm, lmac_id, quanta_offset);
+		if (enable) {
+			cfg |= ((u64)RPM_DEFAULT_PAUSE_TIME <<  shift * 16);
+		} else {
+			if (!shift)
+				cfg &= ~GENMASK_ULL(15, 0);
+			else
+				cfg &= ~GENMASK_ULL(31, 16);
+		}
+		rpm_write(rpm, lmac_id, quanta_offset, cfg);
+
+		cfg = rpm_read(rpm, lmac_id, quanta_thresh);
+		if (enable) {
+			cfg |= ((u64)(RPM_DEFAULT_PAUSE_TIME / 2) <<  shift * 16);
+		} else {
+			if (!shift)
+				cfg &= ~GENMASK_ULL(15, 0);
+			else
+				cfg &= ~GENMASK_ULL(31, 16);
+		}
+		rpm_write(rpm, lmac_id, quanta_thresh, cfg);
+	}
+}
+
 int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
 			      u8 rx_pause)
 {
@@ -152,8 +243,12 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
 
 	cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP);
 	if (tx_pause) {
+		/* Configure CL0 Pause Quanta & threshold for 802.3X frames */
+		rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true);
 		cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id);
 	} else {
+		/* Disable all Pause Quanta & threshold values */
+		rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false);
 		cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id);
 		cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id);
 	}
@@ -166,56 +261,20 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable)
 	rpm_t *rpm = rpmd;
 	u64 cfg;
 
-	if (enable) {
-		/* Enable 802.3 pause frame mode */
-		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-		cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
-		/* Enable receive pause frames */
-		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-		cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
-		/* Enable forward pause to TX block */
-		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-		cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
-		/* Enable pause frames transmission */
-		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-		cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
-		/* Set pause time and interval */
-		cfg = rpm_read(rpm, lmac_id,
-			       RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA);
-		cfg &= ~0xFFFFULL;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA,
-			  cfg | RPM_DEFAULT_PAUSE_TIME);
-		/* Set pause interval as the hardware default is too short */
-		cfg = rpm_read(rpm, lmac_id,
-			       RPMX_MTI_MAC100X_CL01_QUANTA_THRESH);
-		cfg &= ~0xFFFFULL;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_QUANTA_THRESH,
-			  cfg | (RPM_DEFAULT_PAUSE_TIME / 2));
-
-	} else {
-		/* ALL pause frames received are completely ignored */
-		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-		cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+	/* ALL pause frames received are completely ignored */
+	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+	cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE;
+	rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
 
-		/* Disable forward pause to TX block */
-		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-		cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+	/* Disable forward pause to TX block */
+	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+	cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
+	rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
 
-		/* Disable pause frames transmission */
-		cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-		cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
-		rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-	}
+	/* Disable pause frames transmission */
+	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+	cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
+	rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
 }
 
 int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat)
@@ -323,3 +382,65 @@ void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable)
 		cfg &= ~RPMX_RX_TS_PREPEND;
 	rpm_write(rpm, lmac_id, RPMX_CMRX_CFG, cfg);
 }
+
+int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 pfc_en)
+{
+	rpm_t *rpm = rpmd;
+	u64 cfg;
+
+	if (!is_lmac_valid(rpm, lmac_id))
+		return -ENODEV;
+
+	/* reset PFC class quanta and threshold */
+	rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false);
+
+	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+
+	if (rx_pause) {
+		cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+				RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+				RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
+	} else {
+		cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+				RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+				RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
+	}
+
+	if (tx_pause) {
+		rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, pfc_en, true);
+		cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
+	} else {
+		rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xfff, false);
+		cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
+	}
+
+	if (!rx_pause && !tx_pause)
+		cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE;
+	else
+		cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE;
+
+	rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+
+	cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL);
+	cfg = FIELD_SET(RPM_PFC_CLASS_MASK, pfc_en, cfg);
+	rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg);
+
+	return 0;
+}
+
+int  rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, u8 *rx_pause)
+{
+	rpm_t *rpm = rpmd;
+	u64 cfg;
+
+	if (!is_lmac_valid(rpm, lmac_id))
+		return -ENODEV;
+
+	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+	if (cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE) {
+		*rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE);
+		*tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE);
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
index ff580311edd0..9ab8d49dd180 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
@@ -33,7 +33,21 @@
 #define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE	BIT_ULL(8)
 #define RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE	BIT_ULL(19)
 #define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA		0x80A8
+#define RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA		0x80B0
+#define RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA		0x80B8
+#define RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA		0x80C0
 #define RPMX_MTI_MAC100X_CL01_QUANTA_THRESH		0x80C8
+#define RPMX_MTI_MAC100X_CL23_QUANTA_THRESH		0x80D0
+#define RPMX_MTI_MAC100X_CL45_QUANTA_THRESH		0x80D8
+#define RPMX_MTI_MAC100X_CL67_QUANTA_THRESH		0x80E0
+#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA		0x8108
+#define RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA		0x8110
+#define RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA		0x8118
+#define RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA		0x8120
+#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH		0x8128
+#define RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH		0x8130
+#define RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH		0x8138
+#define RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH		0x8140
 #define RPM_DEFAULT_PAUSE_TIME			0xFFFF
 #define RPMX_CMR_RX_OVR_BP		0x4120
 #define RPMX_CMR_RX_OVR_BP_EN(x)	BIT_ULL((x) + 8)
@@ -45,6 +59,18 @@
 #define RPM_LMAC_FWI			0xa
 #define RPM_TX_EN			BIT_ULL(0)
 #define RPM_RX_EN			BIT_ULL(1)
+#define RPMX_CMRX_PRT_CBFC_CTL                         0x5B08
+#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_RX_SHIFT        33
+#define RPMX_CMRX_PRT_CBFC_CTL_PHYS_BP_SHIFT           16
+#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_TX_SHIFT        0
+#define RPM_PFC_CLASS_MASK			       GENMASK_ULL(48, 33)
+#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH		0x8128
+#define RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_PAD_EN              BIT_ULL(11)
+#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE           BIT_ULL(8)
+#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD              BIT_ULL(7)
+#define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA              0x80A8
+#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA		0x8108
+#define RPM_DEFAULT_PAUSE_TIME                          0xFFFF
 
 /* Function Declarations */
 int rpm_get_nr_lmacs(void *rpmd);
@@ -61,4 +87,8 @@ int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat);
 void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable);
 int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable);
 int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable);
+int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause,
+			u16 pfc_en);
+int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause,
+			     u8 *rx_pause);
 #endif /* RPM_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 5ed94cfb47d2..513b43ecd5be 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -807,6 +807,9 @@ u32  rvu_cgx_get_fifolen(struct rvu *rvu);
 void *rvu_first_cgx_pdata(struct rvu *rvu);
 int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id);
 int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable);
+int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause,
+			       u16 pfc_en);
+int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause);
 
 int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
 			     int type);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 8a7ac5a8b821..9ffe99830e34 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -863,6 +863,45 @@ int rvu_mbox_handler_cgx_intlbk_disable(struct rvu *rvu, struct msg_req *req,
 	return 0;
 }
 
+int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 rx_pfc = 0, tx_pfc = 0;
+	struct mac_ops *mac_ops;
+	u8 cgx_id, lmac_id;
+	void *cgxd;
+
+	if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC))
+		return 0;
+
+	/* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if (!is_pf_cgxmapped(rvu, pf))
+		return LMAC_AF_ERR_PF_NOT_MAPPED;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+	cgxd = rvu_cgx_pdata(cgx_id, rvu);
+	mac_ops = get_mac_ops(cgxd);
+
+	mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &tx_pfc, &rx_pfc);
+	if (tx_pfc || rx_pfc) {
+		dev_warn(rvu->dev,
+			 "Can not configure 802.3X flow control as PFC frames are enabled");
+		return LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED;
+	}
+
+	mutex_lock(&rvu->rsrc_lock);
+	if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause,
+			       pcifunc & RVU_PFVF_FUNC_MASK)) {
+		mutex_unlock(&rvu->rsrc_lock);
+		return LMAC_AF_ERR_PERM_DENIED;
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+
+	return mac_ops->mac_enadis_pause_frm(cgxd, lmac_id, tx_pause, rx_pause);
+}
+
 int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu,
 				       struct cgx_pause_frm_cfg *req,
 				       struct cgx_pause_frm_cfg *rsp)
@@ -870,11 +909,9 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu,
 	int pf = rvu_get_pf(req->hdr.pcifunc);
 	struct mac_ops *mac_ops;
 	u8 cgx_id, lmac_id;
+	int err = 0;
 	void *cgxd;
 
-	if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC))
-		return 0;
-
 	/* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
 	 * if received from other PF/VF simply ACK, nothing to do.
 	 */
@@ -886,13 +923,11 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu,
 	mac_ops = get_mac_ops(cgxd);
 
 	if (req->set)
-		mac_ops->mac_enadis_pause_frm(cgxd, lmac_id,
-					      req->tx_pause, req->rx_pause);
+		err = rvu_cgx_cfg_pause_frm(rvu, req->hdr.pcifunc, req->tx_pause, req->rx_pause);
 	else
-		mac_ops->mac_get_pause_frm_status(cgxd, lmac_id,
-						  &rsp->tx_pause,
-						  &rsp->rx_pause);
-	return 0;
+		mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause);
+
+	return err;
 }
 
 int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req,
@@ -1079,3 +1114,67 @@ int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu,
 	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
 	return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index);
 }
+
+int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause,
+			       u8 rx_pause, u16 pfc_en)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 rx_8023 = 0, tx_8023 = 0;
+	struct mac_ops *mac_ops;
+	u8 cgx_id, lmac_id;
+	void *cgxd;
+
+	/* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if (!is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+	cgxd = rvu_cgx_pdata(cgx_id, rvu);
+	mac_ops = get_mac_ops(cgxd);
+
+	mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &tx_8023, &rx_8023);
+	if (tx_8023 || rx_8023) {
+		dev_warn(rvu->dev,
+			 "Can not configure PFC as 802.3X pause frames are enabled");
+		return LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED;
+	}
+
+	mutex_lock(&rvu->rsrc_lock);
+	if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause,
+			       pcifunc & RVU_PFVF_FUNC_MASK)) {
+		mutex_unlock(&rvu->rsrc_lock);
+		return LMAC_AF_ERR_PERM_DENIED;
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+
+	return mac_ops->pfc_config(cgxd, lmac_id, tx_pause, rx_pause, pfc_en);
+}
+
+int rvu_mbox_handler_cgx_prio_flow_ctrl_cfg(struct rvu *rvu,
+					    struct cgx_pfc_cfg *req,
+					    struct cgx_pfc_rsp *rsp)
+{
+	int pf = rvu_get_pf(req->hdr.pcifunc);
+	struct mac_ops *mac_ops;
+	u8 cgx_id, lmac_id;
+	void *cgxd;
+	int err;
+
+	/* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if (!is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+	cgxd = rvu_cgx_pdata(cgx_id, rvu);
+	mac_ops = get_mac_ops(cgxd);
+
+	err = rvu_cgx_prio_flow_ctrl_cfg(rvu, req->hdr.pcifunc, req->tx_pause,
+					 req->rx_pause, req->pfc_en);
+
+	mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause);
+	return err;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 97fb61915379..73cd39af0090 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -296,7 +296,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf,
 	struct rvu_hwinfo *hw = rvu->hw;
 	struct sdp_node_info *sdp_info;
 	int pkind, pf, vf, lbkid, vfid;
-	struct mac_ops *mac_ops;
 	u8 cgx_id, lmac_id;
 	bool from_vf;
 	int err;
@@ -326,13 +325,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf,
 		cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind);
 		rvu_npc_set_pkind(rvu, pkind, pfvf);
 
-		mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu));
-
-		/* By default we enable pause frames */
-		if ((pcifunc & RVU_PFVF_FUNC_MASK) == 0)
-			mac_ops->mac_enadis_pause_frm(rvu_cgx_pdata(cgx_id,
-								    rvu),
-						      lmac_id, true, true);
 		break;
 	case NIX_INTF_TYPE_LBK:
 		vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
@@ -533,7 +525,7 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
 	 */
 	switch (type) {
 	case NIX_INTF_TYPE_CGX:
-		if ((req->chan_base + req->chan_cnt) > 15)
+		if ((req->chan_base + req->chan_cnt) > 16)
 			return -EINVAL;
 		rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id);
 		/* Assign bpid based on cgx, lmac and chan id */
@@ -4578,6 +4570,12 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
 		pfvf->hw_rx_tstamp_en = false;
 	}
 
+	/* reset priority flow control config */
+	rvu_cgx_prio_flow_ctrl_cfg(rvu, pcifunc, 0, 0, 0);
+
+	/* reset 802.3x flow control config */
+	rvu_cgx_cfg_pause_frm(rvu, pcifunc, 0, 0);
+
 	nix_ctx_free(rvu, pfvf);
 
 	nix_free_all_bandprof(rvu, pcifunc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 0048b5946712..d463dc72d80a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -11,4 +11,7 @@ rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
                otx2_devlink.o
 rvu_nicvf-y := otx2_vf.o otx2_devlink.o
 
+rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o
+rvu_nicvf-$(CONFIG_DCB) += otx2_dcbnl.o
+
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 92c0ddb602ca..358e5b216698 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -269,6 +269,7 @@ unlock:
 	mutex_unlock(&pfvf->mbox.lock);
 	return err;
 }
+EXPORT_SYMBOL(otx2_config_pause_frm);
 
 int otx2_set_flowkey_cfg(struct otx2_nic *pfvf)
 {
@@ -938,7 +939,11 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
 		if (!is_otx2_lbkvf(pfvf->pdev)) {
 			/* Enable receive CQ backpressure */
 			aq->cq.bp_ena = 1;
+#ifdef CONFIG_DCB
+			aq->cq.bpid = pfvf->bpid[pfvf->queue_to_pfc_map[qidx]];
+#else
 			aq->cq.bpid = pfvf->bpid[0];
+#endif
 
 			/* Set backpressure level is same as cq pass level */
 			aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
@@ -1218,7 +1223,11 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
 		 */
 		if (pfvf->nix_blkaddr == BLKADDR_NIX1)
 			aq->aura.bp_ena = 1;
+#ifdef CONFIG_DCB
+		aq->aura.nix0_bpid = pfvf->bpid[pfvf->queue_to_pfc_map[aura_id]];
+#else
 		aq->aura.nix0_bpid = pfvf->bpid[0];
+#endif
 
 		/* Set backpressure level for RQ's Aura */
 		aq->aura.bp = RQ_BP_LVL_AURA;
@@ -1545,11 +1554,18 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable)
 		return -ENOMEM;
 
 	req->chan_base = 0;
-	req->chan_cnt = 1;
+#ifdef CONFIG_DCB
+	req->chan_cnt = pfvf->pfc_en ? IEEE_8021QAZ_MAX_TCS : 1;
+	req->bpid_per_chan = pfvf->pfc_en ? 1 : 0;
+#else
+	req->chan_cnt =  1;
 	req->bpid_per_chan = 0;
+#endif
+
 
 	return otx2_sync_mbox_msg(&pfvf->mbox);
 }
+EXPORT_SYMBOL(otx2_nix_config_bp);
 
 /* Mbox message handlers */
 void mbox_handler_cgx_stats(struct otx2_nic *pfvf,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 56be20053931..b6be9784ea36 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -399,6 +399,11 @@ struct otx2_nic {
 
 	/* Devlink */
 	struct otx2_devlink	*dl;
+#ifdef CONFIG_DCB
+	/* PFC */
+	u8			pfc_en;
+	u8			*queue_to_pfc_map;
+#endif
 };
 
 static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -879,4 +884,11 @@ int otx2_dmacflt_remove(struct otx2_nic *pf, const u8 *mac, u8 bit_pos);
 int otx2_dmacflt_update(struct otx2_nic *pf, u8 *mac, u8 bit_pos);
 void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf);
 void otx2_dmacflt_update_pfmac_flow(struct otx2_nic *pfvf);
+
+#ifdef CONFIG_DCB
+/* DCB support*/
+void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, bool pfc_enable);
+int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf);
+int otx2_dcbnl_set_ops(struct net_device *dev);
+#endif
 #endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
new file mode 100644
index 000000000000..723d2506d309
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
+ *
+ */
+
+#include "otx2_common.h"
+
+int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf)
+{
+	struct cgx_pfc_cfg *req;
+	struct cgx_pfc_rsp *rsp;
+	int err = 0;
+
+	if (is_otx2_lbkvf(pfvf->pdev))
+		return 0;
+
+	mutex_lock(&pfvf->mbox.lock);
+	req = otx2_mbox_alloc_msg_cgx_prio_flow_ctrl_cfg(&pfvf->mbox);
+	if (!req) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	if (pfvf->pfc_en) {
+		req->rx_pause = true;
+		req->tx_pause = true;
+	} else {
+		req->rx_pause = false;
+		req->tx_pause = false;
+	}
+	req->pfc_en = pfvf->pfc_en;
+
+	if (!otx2_sync_mbox_msg(&pfvf->mbox)) {
+		rsp = (struct cgx_pfc_rsp *)
+		       otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+		if (req->rx_pause != rsp->rx_pause || req->tx_pause != rsp->tx_pause) {
+			dev_warn(pfvf->dev,
+				 "Failed to config PFC\n");
+			err = -EPERM;
+		}
+	}
+unlock:
+	mutex_unlock(&pfvf->mbox.lock);
+	return err;
+}
+
+void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx,
+			       bool pfc_enable)
+{
+	bool if_up = netif_running(pfvf->netdev);
+	struct npa_aq_enq_req *npa_aq;
+	struct nix_aq_enq_req *aq;
+	int err = 0;
+
+	if (pfvf->queue_to_pfc_map[qidx] && pfc_enable) {
+		dev_warn(pfvf->dev,
+			 "PFC enable not permitted as Priority %d already mapped to Queue %d\n",
+			 pfvf->queue_to_pfc_map[qidx], qidx);
+		return;
+	}
+
+	if (if_up) {
+		netif_tx_stop_all_queues(pfvf->netdev);
+		netif_carrier_off(pfvf->netdev);
+	}
+
+	pfvf->queue_to_pfc_map[qidx] = vlan_prio;
+
+	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+	if (!aq) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	aq->cq.bpid = pfvf->bpid[vlan_prio];
+	aq->cq_mask.bpid = GENMASK(8, 0);
+
+	/* Fill AQ info */
+	aq->qidx = qidx;
+	aq->ctype = NIX_AQ_CTYPE_CQ;
+	aq->op = NIX_AQ_INSTOP_WRITE;
+
+	otx2_sync_mbox_msg(&pfvf->mbox);
+
+	npa_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+	if (!npa_aq) {
+		err = -ENOMEM;
+		goto out;
+	}
+	npa_aq->aura.nix0_bpid = pfvf->bpid[vlan_prio];
+	npa_aq->aura_mask.nix0_bpid = GENMASK(8, 0);
+
+	/* Fill NPA AQ info */
+	npa_aq->aura_id = qidx;
+	npa_aq->ctype = NPA_AQ_CTYPE_AURA;
+	npa_aq->op = NPA_AQ_INSTOP_WRITE;
+	otx2_sync_mbox_msg(&pfvf->mbox);
+
+out:
+	if (if_up) {
+		netif_carrier_on(pfvf->netdev);
+		netif_tx_start_all_queues(pfvf->netdev);
+	}
+
+	if (err)
+		dev_warn(pfvf->dev,
+			 "Updating BPIDs in CQ and Aura contexts of RQ%d failed with err %d\n",
+			 qidx, err);
+}
+
+static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+
+	pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+	pfc->pfc_en = pfvf->pfc_en;
+
+	return 0;
+}
+
+static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+	int err;
+
+	/* Save PFC configuration to interface */
+	pfvf->pfc_en = pfc->pfc_en;
+
+	err = otx2_config_priority_flow_ctrl(pfvf);
+	if (err)
+		return err;
+
+	/* Request Per channel Bpids */
+	if (pfc->pfc_en)
+		otx2_nix_config_bp(pfvf, true);
+
+	return 0;
+}
+
+static u8 otx2_dcbnl_getdcbx(struct net_device __always_unused *dev)
+{
+	return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+}
+
+static u8 otx2_dcbnl_setdcbx(struct net_device __always_unused *dev, u8 mode)
+{
+	return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0;
+}
+
+static const struct dcbnl_rtnl_ops otx2_dcbnl_ops = {
+	.ieee_getpfc	= otx2_dcbnl_ieee_getpfc,
+	.ieee_setpfc	= otx2_dcbnl_ieee_setpfc,
+	.getdcbx	= otx2_dcbnl_getdcbx,
+	.setdcbx	= otx2_dcbnl_setdcbx,
+};
+
+int otx2_dcbnl_set_ops(struct net_device *dev)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+
+	pfvf->queue_to_pfc_map = devm_kzalloc(pfvf->dev, pfvf->hw.rx_queues,
+					      GFP_KERNEL);
+	if (!pfvf->queue_to_pfc_map)
+		return -ENOMEM;
+	dev->dcbnl_ops = &otx2_dcbnl_ops;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 77a13fb555fb..54f235c216a9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -21,8 +21,10 @@ struct otx2_flow {
 	u16 entry;
 	bool is_vf;
 	u8 rss_ctx_id;
+#define DMAC_FILTER_RULE		BIT(0)
+#define PFC_FLOWCTRL_RULE		BIT(1)
+	u16 rule_type;
 	int vf;
-	bool dmac_filter;
 };
 
 enum dmac_req {
@@ -899,6 +901,9 @@ static int otx2_is_flow_rule_dmacfilter(struct otx2_nic *pfvf,
 static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
 {
 	u64 ring_cookie = flow->flow_spec.ring_cookie;
+#ifdef CONFIG_DCB
+	int vlan_prio, qidx, pfc_rule = 0;
+#endif
 	struct npc_install_flow_req *req;
 	int err, vf = 0;
 
@@ -940,6 +945,24 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
 			mutex_unlock(&pfvf->mbox.lock);
 			return -EINVAL;
 		}
+
+#ifdef CONFIG_DCB
+		/* Identify PFC rule if PFC enabled and ntuple rule is vlan */
+		if (!vf && (req->features & BIT_ULL(NPC_OUTER_VID)) &&
+		    pfvf->pfc_en && req->op != NIX_RX_ACTIONOP_RSS) {
+			vlan_prio = ntohs(req->packet.vlan_tci) &
+				    ntohs(req->mask.vlan_tci);
+
+			/* Get the priority */
+			vlan_prio >>= 13;
+			flow->rule_type |= PFC_FLOWCTRL_RULE;
+			/* Check if PFC enabled for this priority */
+			if (pfvf->pfc_en & BIT(vlan_prio)) {
+				pfc_rule = true;
+				qidx = req->index;
+			}
+		}
+#endif
 	}
 
 	/* ethtool ring_cookie has (VF + 1) for VF */
@@ -951,6 +974,12 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
 
 	/* Send message to AF */
 	err = otx2_sync_mbox_msg(&pfvf->mbox);
+
+#ifdef CONFIG_DCB
+	if (!err && pfc_rule)
+		otx2_update_bpid_in_rqctx(pfvf, vlan_prio, qidx, true);
+#endif
+
 	mutex_unlock(&pfvf->mbox.lock);
 	return err;
 }
@@ -966,7 +995,7 @@ static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf,
 		return -ENOMEM;
 
 	pf_mac->entry = 0;
-	pf_mac->dmac_filter = true;
+	pf_mac->rule_type |= DMAC_FILTER_RULE;
 	pf_mac->location = pfvf->flow_cfg->max_flows;
 	memcpy(&pf_mac->flow_spec, &flow->flow_spec,
 	       sizeof(struct ethtool_rx_flow_spec));
@@ -1031,7 +1060,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
 		eth_hdr = &flow->flow_spec.h_u.ether_spec;
 
 		/* Sync dmac filter table with updated fields */
-		if (flow->dmac_filter)
+		if (flow->rule_type & DMAC_FILTER_RULE)
 			return otx2_dmacflt_update(pfvf, eth_hdr->h_dest,
 						   flow->entry);
 
@@ -1052,7 +1081,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
 		if (!test_bit(0, &flow_cfg->dmacflt_bmap))
 			otx2_add_flow_with_pfmac(pfvf, flow);
 
-		flow->dmac_filter = true;
+		flow->rule_type |= DMAC_FILTER_RULE;
 		flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap,
 						  flow_cfg->dmacflt_max_flows);
 		fsp->location = flow_cfg->max_flows + flow->entry;
@@ -1120,7 +1149,7 @@ static void otx2_update_rem_pfmac(struct otx2_nic *pfvf, int req)
 	bool found = false;
 
 	list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) {
-		if (iter->dmac_filter && iter->entry == 0) {
+		if ((iter->rule_type & DMAC_FILTER_RULE) && iter->entry == 0) {
 			eth_hdr = &iter->flow_spec.h_u.ether_spec;
 			if (req == DMAC_ADDR_DEL) {
 				otx2_dmacflt_remove(pfvf, eth_hdr->h_dest,
@@ -1156,7 +1185,7 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location)
 	if (!flow)
 		return -ENOENT;
 
-	if (flow->dmac_filter) {
+	if (flow->rule_type & DMAC_FILTER_RULE) {
 		struct ethhdr *eth_hdr = &flow->flow_spec.h_u.ether_spec;
 
 		/* user not allowed to remove dmac filter with interface mac */
@@ -1174,6 +1203,13 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location)
 				  flow_cfg->dmacflt_max_flows) == 1)
 			otx2_update_rem_pfmac(pfvf, DMAC_ADDR_DEL);
 	} else {
+#ifdef CONFIG_DCB
+		if (flow->rule_type & PFC_FLOWCTRL_RULE)
+			otx2_update_bpid_in_rqctx(pfvf, 0,
+						  flow->flow_spec.ring_cookie,
+						  false);
+#endif
+
 		err = otx2_remove_flow_msg(pfvf, flow->entry, false);
 	}
 
@@ -1383,7 +1419,7 @@ void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf)
 	struct ethhdr *eth_hdr;
 
 	list_for_each_entry(iter, &pf->flow_cfg->flow_list, list) {
-		if (iter->dmac_filter) {
+		if (iter->rule_type & DMAC_FILTER_RULE) {
 			eth_hdr = &iter->flow_spec.h_u.ether_spec;
 			otx2_dmacflt_add(pf, eth_hdr->h_dest,
 					 iter->entry);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 86c1c2f77bd7..ede4df51648b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1697,9 +1697,6 @@ int otx2_open(struct net_device *netdev)
 	if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK))
 		otx2_handle_link_event(pf);
 
-	/* Restore pause frame settings */
-	otx2_config_pause_frm(pf);
-
 	/* Install DMAC Filters */
 	if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT)
 		otx2_dmacflt_reinstall_flows(pf);
@@ -2782,9 +2779,11 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	/* Enable link notifications */
 	otx2_cgx_config_linkevents(pf, true);
 
-	/* Enable pause frames by default */
-	pf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED;
-	pf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED;
+#ifdef CONFIG_DCB
+	err = otx2_dcbnl_set_ops(netdev);
+	if (err)
+		goto err_pf_sriov_init;
+#endif
 
 	return 0;
 
@@ -2929,6 +2928,21 @@ static void otx2_remove(struct pci_dev *pdev)
 	if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED)
 		otx2_config_hw_rx_tstamp(pf, false);
 
+	/* Disable 802.3x pause frames */
+	if (pf->flags & OTX2_FLAG_RX_PAUSE_ENABLED ||
+	    (pf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) {
+		pf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED;
+		pf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED;
+		otx2_config_pause_frm(pf);
+	}
+
+#ifdef CONFIG_DCB
+	/* Disable PFC config */
+	if (pf->pfc_en) {
+		pf->pfc_en = 0;
+		otx2_config_priority_flow_ctrl(pf);
+	}
+#endif
 	cancel_work_sync(&pf->reset_task);
 	/* Disable link notifications */
 	otx2_cgx_config_linkevents(pf, false);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index d96c8903c67e..78142498d046 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -702,9 +702,11 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto err_unreg_netdev;
 
-	/* Enable pause frames by default */
-	vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED;
-	vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED;
+#ifdef CONFIG_DCB
+	err = otx2_dcbnl_set_ops(netdev);
+	if (err)
+		goto err_unreg_netdev;
+#endif
 
 	return 0;
 
@@ -740,6 +742,22 @@ static void otx2vf_remove(struct pci_dev *pdev)
 
 	vf = netdev_priv(netdev);
 
+	/* Disable 802.3x pause frames */
+	if (vf->flags & OTX2_FLAG_RX_PAUSE_ENABLED ||
+	    (vf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) {
+		vf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED;
+		vf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED;
+		otx2_config_pause_frm(vf);
+	}
+
+#ifdef CONFIG_DCB
+	/* Disable PFC config */
+	if (vf->pfc_en) {
+		vf->pfc_en = 0;
+		otx2_config_priority_flow_ctrl(vf);
+	}
+#endif
+
 	cancel_work_sync(&vf->reset_task);
 	otx2_unregister_dl(vf);
 	unregister_netdev(netdev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 77e82e6cf6e8..fa33caecc91d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -1957,6 +1957,83 @@ int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
 }
 EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter);
 
+/* SIP DIP Action
+ * --------------
+ * The SIP_DIP_ACTION is used for modifying the SIP and DIP fields of the
+ * packet, e.g. for NAT. The L3 checksum is updated. Also, if the L4 is TCP or
+ * if the L4 is UDP and the checksum field is not zero, then the L4 checksum is
+ * updated.
+ */
+
+#define MLXSW_AFA_IP_CODE 0x11
+#define MLXSW_AFA_IP_SIZE 2
+
+enum mlxsw_afa_ip_s_d {
+	/* ip refers to dip */
+	MLXSW_AFA_IP_S_D_DIP,
+	/* ip refers to sip */
+	MLXSW_AFA_IP_S_D_SIP,
+};
+
+/* afa_ip_s_d
+ * Source or destination.
+ */
+MLXSW_ITEM32(afa, ip, s_d, 0x00, 31, 1);
+
+enum mlxsw_afa_ip_m_l {
+	/* LSB: ip[63:0] refers to ip[63:0] */
+	MLXSW_AFA_IP_M_L_LSB,
+	/* MSB: ip[63:0] refers to ip[127:64] */
+	MLXSW_AFA_IP_M_L_MSB,
+};
+
+/* afa_ip_m_l
+ * MSB or LSB.
+ */
+MLXSW_ITEM32(afa, ip, m_l, 0x00, 30, 1);
+
+/* afa_ip_ip_63_32
+ * Bits [63:32] in the IP address to change to.
+ */
+MLXSW_ITEM32(afa, ip, ip_63_32, 0x08, 0, 32);
+
+/* afa_ip_ip_31_0
+ * Bits [31:0] in the IP address to change to.
+ */
+MLXSW_ITEM32(afa, ip, ip_31_0, 0x0C, 0, 32);
+
+static void mlxsw_afa_ip_pack(char *payload, enum mlxsw_afa_ip_s_d s_d,
+			      enum mlxsw_afa_ip_m_l m_l, u32 ip_31_0,
+			      u32 ip_63_32)
+{
+	mlxsw_afa_ip_s_d_set(payload, s_d);
+	mlxsw_afa_ip_m_l_set(payload, m_l);
+	mlxsw_afa_ip_ip_31_0_set(payload, ip_31_0);
+	mlxsw_afa_ip_ip_63_32_set(payload, ip_63_32);
+}
+
+int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip,
+			      bool is_lsb, u32 val_31_0, u32 val_63_32,
+			      struct netlink_ext_ack *extack)
+{
+	enum mlxsw_afa_ip_s_d s_d = is_dip ? MLXSW_AFA_IP_S_D_DIP :
+					     MLXSW_AFA_IP_S_D_SIP;
+	enum mlxsw_afa_ip_m_l m_l = is_lsb ? MLXSW_AFA_IP_M_L_LSB :
+					     MLXSW_AFA_IP_M_L_MSB;
+	char *act = mlxsw_afa_block_append_action(block,
+						  MLXSW_AFA_IP_CODE,
+						  MLXSW_AFA_IP_SIZE);
+
+	if (IS_ERR(act)) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot append IP action");
+		return PTR_ERR(act);
+	}
+
+	mlxsw_afa_ip_pack(act, s_d, m_l, val_31_0, val_63_32);
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_ip);
+
 /* L4 Port Action
  * --------------
  * The L4_PORT_ACTION is used for modifying the sport and dport fields of the packet, e.g. for NAT.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 16cbd6acbb01..db58037be46e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -92,6 +92,9 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid,
 int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
 				    u16 expected_irif, u16 min_mtu,
 				    bool rmid_valid, u32 kvdl_index);
+int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip,
+			      bool is_lsb, u32 val_31_0, u32 val_63_32,
+			      struct netlink_ext_ack *extack);
 int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport, u16 l4_port,
 				  struct netlink_ext_ack *extack);
 int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 30942b6ffcf9..20588e699588 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -820,6 +820,24 @@ int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
 /* spectrum2_kvdl.c */
 extern const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops;
 
+enum mlxsw_sp_acl_mangle_field {
+	MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3,
+	MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4,
+};
+
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
 	struct mlxsw_afk_element_values values;
@@ -828,9 +846,14 @@ struct mlxsw_sp_acl_rule_info {
 	   ingress_bind_blocker:1,
 	   egress_bind_blocker:1,
 	   counter_valid:1,
-	   policer_index_valid:1;
+	   policer_index_valid:1,
+	   ipv6_valid:1;
 	unsigned int counter_index;
 	u16 policer_index;
+	struct {
+		u32 prev_val;
+		enum mlxsw_sp_acl_mangle_field prev_field;
+	} ipv6;
 };
 
 /* spectrum_flow.c */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 70c11bfac08f..6c5af018546f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -505,14 +505,6 @@ int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp,
 						      extack);
 }
 
-enum mlxsw_sp_acl_mangle_field {
-	MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD,
-	MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP,
-	MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN,
-	MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT,
-	MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT,
-};
-
 struct mlxsw_sp_acl_mangle_action {
 	enum flow_action_mangle_base htype;
 	/* Offset is u32-aligned. */
@@ -561,6 +553,18 @@ static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = {
 
 	MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0x0000ffff, 16, IP_SPORT),
 	MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0xffff0000, 0,  IP_DPORT),
+
+	MLXSW_SP_ACL_MANGLE_ACTION_IP4(12, 0x00000000, 0, IP4_SIP),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP4(16, 0x00000000, 0, IP4_DIP),
+
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(8, 0x00000000, 0, IP6_SIP_1),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(12, 0x00000000, 0, IP6_SIP_2),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(16, 0x00000000, 0, IP6_SIP_3),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(20, 0x00000000, 0, IP6_SIP_4),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(24, 0x00000000, 0, IP6_DIP_1),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(28, 0x00000000, 0, IP6_DIP_2),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(32, 0x00000000, 0, IP6_DIP_3),
+	MLXSW_SP_ACL_MANGLE_ACTION_IP6(36, 0x00000000, 0, IP6_DIP_4),
 };
 
 static int
@@ -599,6 +603,22 @@ static int mlxsw_sp1_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp,
 	return err;
 }
 
+static int
+mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(struct mlxsw_sp_acl_rule_info *rulei,
+					    enum mlxsw_sp_acl_mangle_field field,
+					    u32 val, struct netlink_ext_ack *extack)
+{
+	if (!rulei->ipv6_valid) {
+		rulei->ipv6.prev_val = val;
+		rulei->ipv6_valid = true;
+		rulei->ipv6.prev_field = field;
+		return 0;
+	}
+
+	NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field order");
+	return -EOPNOTSUPP;
+}
+
 static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp,
 						struct mlxsw_sp_acl_rule_info *rulei,
 						struct mlxsw_sp_acl_mangle_action *mact,
@@ -615,6 +635,61 @@ static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp,
 		return mlxsw_afa_block_append_l4port(rulei->act_block, false, val, extack);
 	case MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT:
 		return mlxsw_afa_block_append_l4port(rulei->act_block, true, val, extack);
+	/* IPv4 fields */
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP:
+		return mlxsw_afa_block_append_ip(rulei->act_block, false,
+						 true, val, 0, extack);
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP:
+		return mlxsw_afa_block_append_ip(rulei->act_block, true,
+						 true, val, 0, extack);
+	/* IPv6 fields */
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1:
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3:
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1:
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3:
+		return mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(rulei,
+								   mact->field,
+								   val, extack);
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2:
+		if (rulei->ipv6_valid &&
+		    rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1) {
+			rulei->ipv6_valid = false;
+			return mlxsw_afa_block_append_ip(rulei->act_block,
+							 false, false, val,
+							 rulei->ipv6.prev_val,
+							 extack);
+		}
+		break;
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4:
+		if (rulei->ipv6_valid &&
+		    rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3) {
+			rulei->ipv6_valid = false;
+			return mlxsw_afa_block_append_ip(rulei->act_block,
+							 false, true, val,
+							 rulei->ipv6.prev_val,
+							 extack);
+		}
+		break;
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2:
+		if (rulei->ipv6_valid &&
+		    rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1) {
+			rulei->ipv6_valid = false;
+			return mlxsw_afa_block_append_ip(rulei->act_block,
+							 true, false, val,
+							 rulei->ipv6.prev_val,
+							 extack);
+		}
+		break;
+	case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4:
+		if (rulei->ipv6_valid &&
+		    rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3) {
+			rulei->ipv6_valid = false;
+			return mlxsw_afa_block_append_ip(rulei->act_block,
+							 true, true, val,
+							 rulei->ipv6.prev_val,
+							 extack);
+		}
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index bb417db773b9..f54af3d9a03b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -233,6 +233,12 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 			return -EOPNOTSUPP;
 		}
 	}
+
+	if (rulei->ipv6_valid) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field");
+		return -EOPNOTSUPP;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 8eaaa7559407..d62484f14564 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -4,11 +4,13 @@
 #include <linux/if_bridge.h>
 #include <linux/if_vlan.h>
 #include <linux/iopoll.h>
+#include <linux/ip.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
 #include <linux/packing.h>
 #include <linux/phy/phy.h>
 #include <linux/reset.h>
+#include <net/addrconf.h>
 
 #include "lan966x_main.h"
 
@@ -419,6 +421,32 @@ bool lan966x_netdevice_check(const struct net_device *dev)
 	return dev->netdev_ops == &lan966x_port_netdev_ops;
 }
 
+static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port,
+			       struct sk_buff *skb)
+{
+	u32 val;
+
+	/* The IGMP and MLD frames are not forward by the HW if
+	 * multicast snooping is enabled, therefor don't mark as
+	 * offload to allow the SW to forward the frames accordingly.
+	 */
+	val = lan_rd(lan966x, ANA_CPU_FWD_CFG(port));
+	if (!(val & (ANA_CPU_FWD_CFG_IGMP_REDIR_ENA |
+		     ANA_CPU_FWD_CFG_MLD_REDIR_ENA)))
+		return true;
+
+	if (skb->protocol == htons(ETH_P_IP) &&
+	    ip_hdr(skb)->protocol == IPPROTO_IGMP)
+		return false;
+
+	if (skb->protocol == htons(ETH_P_IPV6) &&
+	    ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
+	    !ipv6_mc_check_mld(skb))
+		return false;
+
+	return true;
+}
+
 static int lan966x_port_xtr_status(struct lan966x *lan966x, u8 grp)
 {
 	return lan_rd(lan966x, QS_XTR_RD(grp));
@@ -563,9 +591,14 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
 		lan966x_ptp_rxtstamp(lan966x, skb, timestamp);
 		skb->protocol = eth_type_trans(skb, dev);
 
-		if (lan966x->bridge_mask & BIT(src_port))
+		if (lan966x->bridge_mask & BIT(src_port)) {
 			skb->offload_fwd_mark = 1;
 
+			skb_reset_network_header(skb);
+			if (!lan966x_hw_offload(lan966x, src_port, skb))
+				skb->offload_fwd_mark = 0;
+		}
+
 		netif_rx_ni(skb);
 		dev->stats.rx_bytes += len;
 		dev->stats.rx_packets++;
@@ -767,7 +800,7 @@ static void lan966x_init(struct lan966x *lan966x)
 	/* Setup flooding PGIDs */
 	lan_wr(ANA_FLOODING_IPMC_FLD_MC4_DATA_SET(PGID_MCIPV4) |
 	       ANA_FLOODING_IPMC_FLD_MC4_CTRL_SET(PGID_MC) |
-	       ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MC) |
+	       ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MCIPV6) |
 	       ANA_FLOODING_IPMC_FLD_MC6_CTRL_SET(PGID_MC),
 	       lan966x, ANA_FLOODING_IPMC);
 
@@ -829,6 +862,10 @@ static void lan966x_init(struct lan966x *lan966x)
 		ANA_PGID_PGID,
 		lan966x, ANA_PGID(PGID_MCIPV4));
 
+	lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0),
+		ANA_PGID_PGID,
+		lan966x, ANA_PGID(PGID_MCIPV6));
+
 	/* Unicast to all other ports */
 	lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0),
 		ANA_PGID_PGID,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 026474c609ea..058e43531818 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -170,6 +170,7 @@ struct lan966x_port {
 	bool vlan_aware;
 
 	bool learn_ena;
+	bool mcast_ena;
 
 	struct phylink_config phylink_config;
 	struct phylink_pcs phylink_pcs;
@@ -266,6 +267,8 @@ int lan966x_handle_port_mdb_del(struct lan966x_port *port,
 				const struct switchdev_obj *obj);
 void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid);
 void lan966x_mdb_write_entries(struct lan966x *lan966x, u16 vid);
+void lan966x_mdb_clear_entries(struct lan966x *lan966x);
+void lan966x_mdb_restore_entries(struct lan966x *lan966x);
 
 int lan966x_ptp_init(struct lan966x *lan966x);
 void lan966x_ptp_deinit(struct lan966x *lan966x);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c
index c68d0a99d292..2af55268bf4d 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c
@@ -504,3 +504,48 @@ void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid)
 			lan966x_mdb_l2_cpu_remove(lan966x, mdb_entry, type);
 	}
 }
+
+void lan966x_mdb_clear_entries(struct lan966x *lan966x)
+{
+	struct lan966x_mdb_entry *mdb_entry;
+	enum macaccess_entry_type type;
+	unsigned char mac[ETH_ALEN];
+
+	list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) {
+		type = lan966x_mdb_classify(mdb_entry->mac);
+
+		lan966x_mdb_encode_mac(mac, mdb_entry, type);
+		/* Remove just the MAC entry, still keep the PGID in case of L2
+		 * entries because this can be restored at later point
+		 */
+		lan966x_mac_forget(lan966x, mac, mdb_entry->vid, type);
+	}
+}
+
+void lan966x_mdb_restore_entries(struct lan966x *lan966x)
+{
+	struct lan966x_mdb_entry *mdb_entry;
+	enum macaccess_entry_type type;
+	unsigned char mac[ETH_ALEN];
+	bool cpu_copy = false;
+
+	list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) {
+		type = lan966x_mdb_classify(mdb_entry->mac);
+
+		lan966x_mdb_encode_mac(mac, mdb_entry, type);
+		if (type == ENTRYTYPE_MACV4 || type == ENTRYTYPE_MACV6) {
+			/* Copy the frame to CPU only if the CPU is in the VLAN */
+			if (lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+								  mdb_entry->vid) &&
+			    mdb_entry->cpu_copy)
+				cpu_copy = true;
+
+			lan966x_mac_ip_learn(lan966x, cpu_copy, mac,
+					     mdb_entry->vid, type);
+		} else {
+			lan966x_mac_learn(lan966x, mdb_entry->pgid->index,
+					  mdb_entry->mac,
+					  mdb_entry->vid, type);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
index 37a5d7e63cb6..0c0b3e173d53 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
@@ -299,6 +299,24 @@ enum lan966x_target {
 /*      ANA:PORT:CPU_FWD_CFG */
 #define ANA_CPU_FWD_CFG(g)        __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4)
 
+#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA            BIT(6)
+#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(x)\
+	FIELD_PREP(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x)
+#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_GET(x)\
+	FIELD_GET(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x)
+
+#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA           BIT(5)
+#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(x)\
+	FIELD_PREP(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x)
+#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_GET(x)\
+	FIELD_GET(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x)
+
+#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA       BIT(4)
+#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(x)\
+	FIELD_PREP(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x)
+#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_GET(x)\
+	FIELD_GET(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x)
+
 #define ANA_CPU_FWD_CFG_SRC_COPY_ENA             BIT(3)
 #define ANA_CPU_FWD_CFG_SRC_COPY_ENA_SET(x)\
 	FIELD_PREP(ANA_CPU_FWD_CFG_SRC_COPY_ENA, x)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index 7de55f6a4da8..9fce865287e7 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -9,6 +9,37 @@ static struct notifier_block lan966x_netdevice_nb __read_mostly;
 static struct notifier_block lan966x_switchdev_nb __read_mostly;
 static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly;
 
+static void lan966x_port_set_mcast_ip_flood(struct lan966x_port *port,
+					    u32 pgid_ip)
+{
+	struct lan966x *lan966x = port->lan966x;
+	u32 flood_mask_ip;
+
+	flood_mask_ip = lan_rd(lan966x, ANA_PGID(pgid_ip));
+	flood_mask_ip = ANA_PGID_PGID_GET(flood_mask_ip);
+
+	/* If mcast snooping is not enabled then use mcast flood mask
+	 * to decide to enable multicast flooding or not.
+	 */
+	if (!port->mcast_ena) {
+		u32 flood_mask;
+
+		flood_mask = lan_rd(lan966x, ANA_PGID(PGID_MC));
+		flood_mask = ANA_PGID_PGID_GET(flood_mask);
+
+		if (flood_mask & BIT(port->chip_port))
+			flood_mask_ip |= BIT(port->chip_port);
+		else
+			flood_mask_ip &= ~BIT(port->chip_port);
+	} else {
+		flood_mask_ip &= ~BIT(port->chip_port);
+	}
+
+	lan_rmw(ANA_PGID_PGID_SET(flood_mask_ip),
+		ANA_PGID_PGID,
+		lan966x, ANA_PGID(pgid_ip));
+}
+
 static void lan966x_port_set_mcast_flood(struct lan966x_port *port,
 					 bool enabled)
 {
@@ -23,6 +54,11 @@ static void lan966x_port_set_mcast_flood(struct lan966x_port *port,
 	lan_rmw(ANA_PGID_PGID_SET(val),
 		ANA_PGID_PGID,
 		port->lan966x, ANA_PGID(PGID_MC));
+
+	if (!port->mcast_ena) {
+		lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4);
+		lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6);
+	}
 }
 
 static void lan966x_port_set_ucast_flood(struct lan966x_port *port,
@@ -144,6 +180,28 @@ static void lan966x_port_ageing_set(struct lan966x_port *port,
 	lan966x_mac_set_ageing(port->lan966x, ageing_time);
 }
 
+static void lan966x_port_mc_set(struct lan966x_port *port, bool mcast_ena)
+{
+	struct lan966x *lan966x = port->lan966x;
+
+	port->mcast_ena = mcast_ena;
+	if (mcast_ena)
+		lan966x_mdb_restore_entries(lan966x);
+	else
+		lan966x_mdb_clear_entries(lan966x);
+
+	lan_rmw(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(mcast_ena) |
+		ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(mcast_ena) |
+		ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(mcast_ena),
+		ANA_CPU_FWD_CFG_IGMP_REDIR_ENA |
+		ANA_CPU_FWD_CFG_MLD_REDIR_ENA |
+		ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA,
+		lan966x, ANA_CPU_FWD_CFG(port->chip_port));
+
+	lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4);
+	lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6);
+}
+
 static int lan966x_port_attr_set(struct net_device *dev, const void *ctx,
 				 const struct switchdev_attr *attr,
 				 struct netlink_ext_ack *extack)
@@ -171,6 +229,9 @@ static int lan966x_port_attr_set(struct net_device *dev, const void *ctx,
 		lan966x_vlan_port_set_vlan_aware(port, attr->u.vlan_filtering);
 		lan966x_vlan_port_apply(port);
 		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
+		lan966x_port_mc_set(port, !attr->u.mc_disabled);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 35689b5e212c..394de85d360d 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -291,7 +291,6 @@ static int sparx5_create_port(struct sparx5 *sparx5,
 	/* Create a phylink for PHY management.  Also handles SFPs */
 	spx5_port->phylink_config.dev = &spx5_port->ndev->dev;
 	spx5_port->phylink_config.type = PHYLINK_NETDEV;
-	spx5_port->phylink_config.pcs_poll = true;
 	spx5_port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE |
 		MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD |
 		MAC_2500FD | MAC_5000FD | MAC_10000FD | MAC_25000FD;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 69e791e6abc4..b7d3ba1b4d17 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1085,8 +1085,10 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
 		break;
 
 	case CQE_RX_TRUNCATED:
-		netdev_err(ndev, "Dropped a truncated packet\n");
-		return;
+		++ndev->stats.rx_dropped;
+		rxbuf_oob = &rxq->rx_oobs[rxq->buf_index];
+		netdev_warn_once(ndev, "Dropped a truncated packet\n");
+		goto drop;
 
 	case CQE_RX_COALESCED_4:
 		netdev_err(ndev, "RX coalescing is unsupported\n");
@@ -1102,9 +1104,6 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
 		return;
 	}
 
-	if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY)
-		return;
-
 	pktlen = oob->ppi[0].pkt_len;
 
 	if (pktlen == 0) {
@@ -1154,6 +1153,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
 
 	mana_rx_skb(old_buf, oob, rxq);
 
+drop:
 	mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
 
 	mana_post_pkt_rxq(rxq);
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index f7ad5487879b..15c295f90196 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -429,15 +429,6 @@ static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
 	{ 0x0d, 0xf880 }
 };
 
-static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
-	{ 0x1f, 0x0002 },
-	{ 0x05, 0x669a },
-	{ 0x1f, 0x0005 },
-	{ 0x05, 0x8330 },
-	{ 0x06, 0x669a },
-	{ 0x1f, 0x0002 }
-};
-
 static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp,
 					 struct phy_device *phydev,
 					 u16 val)
@@ -455,6 +446,29 @@ static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp,
 		r8169_apply_firmware(tp);
 }
 
+static void rtl8168d_1_common(struct phy_device *phydev)
+{
+	u16 val;
+
+	phy_write_paged(phydev, 0x0002, 0x05, 0x669a);
+	r8168d_phy_param(phydev, 0x8330, 0xffff, 0x669a);
+	phy_write(phydev, 0x1f, 0x0002);
+
+	val = phy_read(phydev, 0x0d);
+
+	if ((val & 0x00ff) != 0x006c) {
+		static const u16 set[] = {
+			0x0065, 0x0066, 0x0067, 0x0068,
+			0x0069, 0x006a, 0x006b, 0x006c
+		};
+		int i;
+
+		val &= 0xff00;
+		for (i = 0; i < ARRAY_SIZE(set); i++)
+			phy_write(phydev, 0x0d, val | set[i]);
+	}
+}
+
 static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp,
 				     struct phy_device *phydev)
 {
@@ -469,25 +483,7 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp,
 	phy_modify(phydev, 0x0c, 0x5d00, 0xa200);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		int val;
-
-		rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
-
-		val = phy_read(phydev, 0x0d);
-
-		if ((val & 0x00ff) != 0x006c) {
-			static const u32 set[] = {
-				0x0065, 0x0066, 0x0067, 0x0068,
-				0x0069, 0x006a, 0x006b, 0x006c
-			};
-			int i;
-
-			phy_write(phydev, 0x1f, 0x0002);
-
-			val &= 0xff00;
-			for (i = 0; i < ARRAY_SIZE(set); i++)
-				phy_write(phydev, 0x0d, val | set[i]);
-		}
+		rtl8168d_1_common(phydev);
 	} else {
 		phy_write_paged(phydev, 0x0002, 0x05, 0x6662);
 		r8168d_phy_param(phydev, 0x8330, 0xffff, 0x6662);
@@ -513,24 +509,7 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp,
 	rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		int val;
-
-		rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
-
-		val = phy_read(phydev, 0x0d);
-		if ((val & 0x00ff) != 0x006c) {
-			static const u32 set[] = {
-				0x0065, 0x0066, 0x0067, 0x0068,
-				0x0069, 0x006a, 0x006b, 0x006c
-			};
-			int i;
-
-			phy_write(phydev, 0x1f, 0x0002);
-
-			val &= 0xff00;
-			for (i = 0; i < ARRAY_SIZE(set); i++)
-				phy_write(phydev, 0x0d, val | set[i]);
-		}
+		rtl8168d_1_common(phydev);
 	} else {
 		phy_write_paged(phydev, 0x0002, 0x05, 0x2642);
 		r8168d_phy_param(phydev, 0x8330, 0xffff, 0x2642);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 8e8778cfbbad..5943ff9f21c2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -383,10 +383,10 @@ static int intel_crosststamp(ktime_t *device,
 
 	/* Repeat until the timestamps are from the FIFO last segment */
 	for (i = 0; i < num_snapshot; i++) {
-		spin_lock_irqsave(&priv->ptp_lock, flags);
+		read_lock_irqsave(&priv->ptp_lock, flags);
 		stmmac_get_ptptime(priv, ptpaddr, &ptp_time);
 		*device = ns_to_ktime(ptp_time);
-		spin_unlock_irqrestore(&priv->ptp_lock, flags);
+		read_unlock_irqrestore(&priv->ptp_lock, flags);
 		get_arttime(priv->mii, intel_priv->mdio_adhoc_addr, &art_time);
 		*system = convert_art_to_tsc(art_time);
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 5b195d5051d6..57970ae2178d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -263,7 +263,7 @@ struct stmmac_priv {
 	u32 adv_ts;
 	int use_riwt;
 	int irq_wake;
-	spinlock_t ptp_lock;
+	rwlock_t ptp_lock;
 	/* Protects auxiliary snapshot registers from concurrent access. */
 	struct mutex aux_ts_lock;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index a7ec9f4d46ce..22fea0f67245 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -196,9 +196,9 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
 		       GMAC_TIMESTAMP_ATSNS_SHIFT;
 
 	for (i = 0; i < num_snapshot; i++) {
-		spin_lock_irqsave(&priv->ptp_lock, flags);
+		read_lock_irqsave(&priv->ptp_lock, flags);
 		get_ptptime(priv->ptpaddr, &ptp_time);
-		spin_unlock_irqrestore(&priv->ptp_lock, flags);
+		read_unlock_irqrestore(&priv->ptp_lock, flags);
 		event.type = PTP_CLOCK_EXTTS;
 		event.index = 0;
 		event.timestamp = ptp_time;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 1c9f02f9c317..e45fb191d8e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -39,9 +39,9 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb)
 	diff = div_u64(adj, 1000000000ULL);
 	addend = neg_adj ? (addend - diff) : (addend + diff);
 
-	spin_lock_irqsave(&priv->ptp_lock, flags);
+	write_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_config_addend(priv, priv->ptpaddr, addend);
-	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+	write_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	return 0;
 }
@@ -86,9 +86,9 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta)
 		mutex_unlock(&priv->plat->est->lock);
 	}
 
-	spin_lock_irqsave(&priv->ptp_lock, flags);
+	write_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac);
-	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+	write_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	/* Caculate new basetime and re-configured EST after PTP time adjust. */
 	if (est_rst) {
@@ -137,9 +137,9 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	unsigned long flags;
 	u64 ns = 0;
 
-	spin_lock_irqsave(&priv->ptp_lock, flags);
+	read_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_get_systime(priv, priv->ptpaddr, &ns);
-	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+	read_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -162,9 +162,9 @@ static int stmmac_set_time(struct ptp_clock_info *ptp,
 	    container_of(ptp, struct stmmac_priv, ptp_clock_ops);
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->ptp_lock, flags);
+	write_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_init_systime(priv, priv->ptpaddr, ts->tv_sec, ts->tv_nsec);
-	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+	write_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	return 0;
 }
@@ -194,12 +194,12 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 		cfg->period.tv_sec = rq->perout.period.sec;
 		cfg->period.tv_nsec = rq->perout.period.nsec;
 
-		spin_lock_irqsave(&priv->ptp_lock, flags);
+		write_lock_irqsave(&priv->ptp_lock, flags);
 		ret = stmmac_flex_pps_config(priv, priv->ioaddr,
 					     rq->perout.index, cfg, on,
 					     priv->sub_second_inc,
 					     priv->systime_flags);
-		spin_unlock_irqrestore(&priv->ptp_lock, flags);
+		write_unlock_irqrestore(&priv->ptp_lock, flags);
 		break;
 	case PTP_CLK_REQ_EXTTS:
 		priv->plat->ext_snapshot_en = on;
@@ -314,7 +314,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
 	stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num;
 	stmmac_ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n;
 
-	spin_lock_init(&priv->ptp_lock);
+	rwlock_init(&priv->ptp_lock);
 	mutex_init(&priv->aux_ts_lock);
 	priv->ptp_clock_ops = stmmac_ptp_clock_ops;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index be3cb63675a5..9f1759593b94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -1777,9 +1777,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv)
 	if (ret)
 		return ret;
 
-	spin_lock_irqsave(&priv->ptp_lock, flags);
+	read_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_get_systime(priv, priv->ptpaddr, &curr_time);
-	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+	read_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	if (!curr_time) {
 		ret = -EOPNOTSUPP;
@@ -1799,9 +1799,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv)
 		goto fail_disable;
 
 	/* Check if expected time has elapsed */
-	spin_lock_irqsave(&priv->ptp_lock, flags);
+	read_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_get_systime(priv, priv->ptpaddr, &curr_time);
-	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+	read_unlock_irqrestore(&priv->ptp_lock, flags);
 
 	if ((curr_time - start_time) < STMMAC_TBS_LT_OFFSET)
 		ret = -EINVAL;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index afa81a9480cc..e675d1016c3c 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -154,19 +154,15 @@ static void free_netvsc_device(struct rcu_head *head)
 
 	kfree(nvdev->extension);
 
-	if (nvdev->recv_original_buf) {
-		hv_unmap_memory(nvdev->recv_buf);
+	if (nvdev->recv_original_buf)
 		vfree(nvdev->recv_original_buf);
-	} else {
+	else
 		vfree(nvdev->recv_buf);
-	}
 
-	if (nvdev->send_original_buf) {
-		hv_unmap_memory(nvdev->send_buf);
+	if (nvdev->send_original_buf)
 		vfree(nvdev->send_original_buf);
-	} else {
+	else
 		vfree(nvdev->send_buf);
-	}
 
 	bitmap_free(nvdev->send_section_map);
 
@@ -765,6 +761,12 @@ void netvsc_device_remove(struct hv_device *device)
 		netvsc_teardown_send_gpadl(device, net_device, ndev);
 	}
 
+	if (net_device->recv_original_buf)
+		hv_unmap_memory(net_device->recv_buf);
+
+	if (net_device->send_original_buf)
+		hv_unmap_memory(net_device->send_buf);
+
 	/* Release all resources */
 	free_netvsc_device_rcu(net_device);
 }
@@ -1821,6 +1823,12 @@ cleanup:
 	netif_napi_del(&net_device->chan_table[0].napi);
 
 cleanup2:
+	if (net_device->recv_original_buf)
+		hv_unmap_memory(net_device->recv_buf);
+
+	if (net_device->send_original_buf)
+		hv_unmap_memory(net_device->send_buf);
+
 	free_netvsc_device(&net_device->rcu);
 
 	return ERR_PTR(ret);
diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index 1544564bc283..87e1d43c118c 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -320,6 +320,17 @@ gsi_trans_tre_release(struct gsi_trans_info *trans_info, u32 tre_count)
 	atomic_add(tre_count, &trans_info->tre_avail);
 }
 
+/* Return true if no transactions are allocated, false otherwise */
+bool gsi_channel_trans_idle(struct gsi *gsi, u32 channel_id)
+{
+	u32 tre_max = gsi_channel_tre_max(gsi, channel_id);
+	struct gsi_trans_info *trans_info;
+
+	trans_info = &gsi->channel[channel_id].trans_info;
+
+	return atomic_read(&trans_info->tre_avail) == tre_max;
+}
+
 /* Allocate a GSI transaction on a channel */
 struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
 					  u32 tre_count,
diff --git a/drivers/net/ipa/gsi_trans.h b/drivers/net/ipa/gsi_trans.h
index 17fd1822d8a9..af379b49299e 100644
--- a/drivers/net/ipa/gsi_trans.h
+++ b/drivers/net/ipa/gsi_trans.h
@@ -130,6 +130,16 @@ void *gsi_trans_pool_alloc_dma(struct gsi_trans_pool *pool, dma_addr_t *addr);
 void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool);
 
 /**
+ * gsi_channel_trans_idle() - Return whether no transactions are allocated
+ * @gsi:	GSI pointer
+ * @channel_id:	Channel the transaction is associated with
+ *
+ * Return:	True if no transactions are allocated, false otherwise
+ *
+ */
+bool gsi_channel_trans_idle(struct gsi *gsi, u32 channel_id);
+
+/**
  * gsi_channel_trans_alloc() - Allocate a GSI transaction on a channel
  * @gsi:	GSI pointer
  * @channel_id:	Channel the transaction is associated with
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index fffd0a784ef2..888e94278a84 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -25,7 +25,8 @@
 
 #define atomic_dec_not_zero(v)	atomic_add_unless((v), -1, 0)
 
-#define IPA_REPLENISH_BATCH	16
+/* Hardware is told about receive buffers once a "batch" has been queued */
+#define IPA_REPLENISH_BATCH	16		/* Must be non-zero */
 
 /* The amount of RX buffer space consumed by standard skb overhead */
 #define IPA_RX_BUFFER_OVERHEAD	(PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0))
@@ -1036,10 +1037,9 @@ static void ipa_endpoint_status(struct ipa_endpoint *endpoint)
 	iowrite32(val, ipa->reg_virt + offset);
 }
 
-static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint)
+static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint,
+				      struct gsi_trans *trans)
 {
-	struct gsi_trans *trans;
-	bool doorbell = false;
 	struct page *page;
 	u32 buffer_size;
 	u32 offset;
@@ -1051,121 +1051,84 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint)
 	if (!page)
 		return -ENOMEM;
 
-	trans = ipa_endpoint_trans_alloc(endpoint, 1);
-	if (!trans)
-		goto err_free_pages;
-
 	/* Offset the buffer to make space for skb headroom */
 	offset = NET_SKB_PAD;
 	len = buffer_size - offset;
 
 	ret = gsi_trans_page_add(trans, page, len, offset);
 	if (ret)
-		goto err_trans_free;
-	trans->data = page;	/* transaction owns page now */
-
-	if (++endpoint->replenish_ready == IPA_REPLENISH_BATCH) {
-		doorbell = true;
-		endpoint->replenish_ready = 0;
-	}
-
-	gsi_trans_commit(trans, doorbell);
-
-	return 0;
-
-err_trans_free:
-	gsi_trans_free(trans);
-err_free_pages:
-	__free_pages(page, get_order(buffer_size));
+		__free_pages(page, get_order(buffer_size));
+	else
+		trans->data = page;	/* transaction owns page now */
 
-	return -ENOMEM;
+	return ret;
 }
 
 /**
  * ipa_endpoint_replenish() - Replenish endpoint receive buffers
  * @endpoint:	Endpoint to be replenished
- * @add_one:	Whether this is replacing a just-consumed buffer
  *
  * The IPA hardware can hold a fixed number of receive buffers for an RX
  * endpoint, based on the number of entries in the underlying channel ring
  * buffer.  If an endpoint's "backlog" is non-zero, it indicates how many
  * more receive buffers can be supplied to the hardware.  Replenishing for
- * an endpoint can be disabled, in which case requests to replenish a
- * buffer are "saved", and transferred to the backlog once it is re-enabled
- * again.
+ * an endpoint can be disabled, in which case buffers are not queued to
+ * the hardware.
  */
-static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one)
+static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint)
 {
-	struct gsi *gsi;
-	u32 backlog;
-	int delta;
+	struct gsi_trans *trans;
 
-	if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
-		if (add_one)
-			atomic_inc(&endpoint->replenish_saved);
+	if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags))
 		return;
-	}
 
-	/* If already active, just update the backlog */
-	if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
-		if (add_one)
-			atomic_inc(&endpoint->replenish_backlog);
+	/* Skip it if it's already active */
+	if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags))
 		return;
-	}
 
-	while (atomic_dec_not_zero(&endpoint->replenish_backlog))
-		if (ipa_endpoint_replenish_one(endpoint))
+	while ((trans = ipa_endpoint_trans_alloc(endpoint, 1))) {
+		bool doorbell;
+
+		if (ipa_endpoint_replenish_one(endpoint, trans))
 			goto try_again_later;
 
-	clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
 
-	if (add_one)
-		atomic_inc(&endpoint->replenish_backlog);
+		/* Ring the doorbell if we've got a full batch */
+		doorbell = !(++endpoint->replenish_count % IPA_REPLENISH_BATCH);
+		gsi_trans_commit(trans, doorbell);
+	}
+
+	clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
 
 	return;
 
 try_again_later:
+	gsi_trans_free(trans);
 	clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
 
-	/* The last one didn't succeed, so fix the backlog */
-	delta = add_one ? 2 : 1;
-	backlog = atomic_add_return(delta, &endpoint->replenish_backlog);
-
 	/* Whenever a receive buffer transaction completes we'll try to
 	 * replenish again.  It's unlikely, but if we fail to supply even
 	 * one buffer, nothing will trigger another replenish attempt.
-	 * Receive buffer transactions use one TRE, so schedule work to
-	 * try replenishing again if our backlog is *all* available TREs.
+	 * If the hardware has no receive buffers queued, schedule work to
+	 * try replenishing again.
 	 */
-	gsi = &endpoint->ipa->gsi;
-	if (backlog == gsi_channel_tre_max(gsi, endpoint->channel_id))
+	if (gsi_channel_trans_idle(&endpoint->ipa->gsi, endpoint->channel_id))
 		schedule_delayed_work(&endpoint->replenish_work,
 				      msecs_to_jiffies(1));
 }
 
 static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint)
 {
-	struct gsi *gsi = &endpoint->ipa->gsi;
-	u32 max_backlog;
-	u32 saved;
-
 	set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
-	while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
-		atomic_add(saved, &endpoint->replenish_backlog);
 
 	/* Start replenishing if hardware currently has no buffers */
-	max_backlog = gsi_channel_tre_max(gsi, endpoint->channel_id);
-	if (atomic_read(&endpoint->replenish_backlog) == max_backlog)
-		ipa_endpoint_replenish(endpoint, false);
+	if (gsi_channel_trans_idle(&endpoint->ipa->gsi, endpoint->channel_id))
+		ipa_endpoint_replenish(endpoint);
 }
 
 static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint)
 {
-	u32 backlog;
-
 	clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
-	while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
-		atomic_add(backlog, &endpoint->replenish_saved);
 }
 
 static void ipa_endpoint_replenish_work(struct work_struct *work)
@@ -1175,7 +1138,7 @@ static void ipa_endpoint_replenish_work(struct work_struct *work)
 
 	endpoint = container_of(dwork, struct ipa_endpoint, replenish_work);
 
-	ipa_endpoint_replenish(endpoint, false);
+	ipa_endpoint_replenish(endpoint);
 }
 
 static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint,
@@ -1380,10 +1343,8 @@ static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint,
 {
 	struct page *page;
 
-	ipa_endpoint_replenish(endpoint, true);
-
 	if (trans->cancelled)
-		return;
+		goto done;
 
 	/* Parse or build a socket buffer using the actual received length */
 	page = trans->data;
@@ -1391,6 +1352,8 @@ static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint,
 		ipa_endpoint_status_parse(endpoint, page, trans->len);
 	else if (ipa_endpoint_skb_build(endpoint, page, trans->len))
 		trans->data = NULL;	/* Pages have been consumed */
+done:
+	ipa_endpoint_replenish(endpoint);
 }
 
 void ipa_endpoint_trans_complete(struct ipa_endpoint *endpoint,
@@ -1727,9 +1690,6 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint)
 		 */
 		clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
 		clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
-		atomic_set(&endpoint->replenish_saved,
-			   gsi_channel_tre_max(gsi, endpoint->channel_id));
-		atomic_set(&endpoint->replenish_backlog, 0);
 		INIT_DELAYED_WORK(&endpoint->replenish_work,
 				  ipa_endpoint_replenish_work);
 	}
@@ -1905,6 +1865,8 @@ u32 ipa_endpoint_init(struct ipa *ipa, u32 count,
 	enum ipa_endpoint_name name;
 	u32 filter_map;
 
+	BUILD_BUG_ON(!IPA_REPLENISH_BATCH);
+
 	if (!ipa_endpoint_data_valid(ipa, count, data))
 		return 0;	/* Error */
 
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 0313cdc607de..12fd5b16c18e 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -65,9 +65,7 @@ enum ipa_replenish_flag {
  * @evt_ring_id:	GSI event ring used by the endpoint
  * @netdev:		Network device pointer, if endpoint uses one
  * @replenish_flags:	Replenishing state flags
- * @replenish_ready:	Number of replenish transactions without doorbell
- * @replenish_saved:	Replenish requests held while disabled
- * @replenish_backlog:	Number of buffers needed to fill hardware queue
+ * @replenish_count:	Total number of replenish transactions committed
  * @replenish_work:	Work item used for repeated replenish failures
  */
 struct ipa_endpoint {
@@ -86,9 +84,7 @@ struct ipa_endpoint {
 
 	/* Receive buffer replenishing for RX endpoints */
 	DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
-	u32 replenish_ready;
-	atomic_t replenish_saved;
-	atomic_t replenish_backlog;
+	u64 replenish_count;
 	struct delayed_work replenish_work;		/* global wq */
 };
 
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 420201858564..5b53a3e23c89 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -132,17 +132,6 @@ void phylink_set_port_modes(unsigned long *mask)
 }
 EXPORT_SYMBOL_GPL(phylink_set_port_modes);
 
-void phylink_set_10g_modes(unsigned long *mask)
-{
-	phylink_set(mask, 10000baseT_Full);
-	phylink_set(mask, 10000baseCR_Full);
-	phylink_set(mask, 10000baseSR_Full);
-	phylink_set(mask, 10000baseLR_Full);
-	phylink_set(mask, 10000baseLRM_Full);
-	phylink_set(mask, 10000baseER_Full);
-}
-EXPORT_SYMBOL_GPL(phylink_set_10g_modes);
-
 static int phylink_is_empty_linkmode(const unsigned long *linkmode)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = { 0, };
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index b554054a7560..e62fc4f2aee0 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -358,6 +358,7 @@ config USB_NET_SMSC95XX
 	select BITREVERSE
 	select CRC16
 	select CRC32
+	imply NET_SELFTESTS
 	help
 	  This option adds support for SMSC LAN95XX based USB 2.0
 	  10/100 Ethernet adapters.
diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index 2a1e31defe71..4334aafab59a 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -192,8 +192,8 @@ extern const struct driver_info ax88172a_info;
 /* ASIX specific flags */
 #define FLAG_EEPROM_MAC		(1UL << 0)  /* init device MAC from eeprom */
 
-int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-		  u16 size, void *data, int in_pm);
+int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+			       u16 size, void *data, int in_pm);
 
 int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 		   u16 size, void *data, int in_pm);
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 71682970be58..524805285019 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -11,8 +11,8 @@
 
 #define AX_HOST_EN_RETRIES	30
 
-int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-		  u16 size, void *data, int in_pm)
+int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+			       u16 size, void *data, int in_pm)
 {
 	int ret;
 	int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
@@ -27,9 +27,12 @@ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 	ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 		 value, index, data, size);
 
-	if (unlikely(ret < 0))
+	if (unlikely(ret < size)) {
+		ret = ret < 0 ? ret : -ENODATA;
+
 		netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n",
 			    index, ret);
+	}
 
 	return ret;
 }
@@ -79,7 +82,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm)
 				    0, 0, 1, &smsr, in_pm);
 		if (ret == -ENODEV)
 			break;
-		else if (ret < sizeof(smsr))
+		else if (ret < 0)
 			continue;
 		else if (smsr & AX_HOST_EN)
 			break;
@@ -579,8 +582,12 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
 		return ret;
 	}
 
-	asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
-		      (__u16)loc, 2, &res, 1);
+	ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
+			    (__u16)loc, 2, &res, 1);
+	if (ret < 0) {
+		mutex_unlock(&dev->phy_mutex);
+		return ret;
+	}
 	asix_set_hw_mii(dev, 1);
 	mutex_unlock(&dev->phy_mutex);
 
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 9b72334aabb6..6ea44e53713a 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -755,7 +755,12 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 	priv->phy_addr = ret;
 	priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10);
 
-	asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
+	ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
+	if (ret < 0) {
+		netdev_dbg(dev->net, "Failed to read STATMNGSTS_REG: %d\n", ret);
+		return ret;
+	}
+
 	chipcode &= AX_CHIPCODE_MASK;
 
 	ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) :
@@ -919,11 +924,21 @@ static int ax88178_reset(struct usbnet *dev)
 	int gpio0 = 0;
 	u32 phyid;
 
-	asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
+	ret = asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
+	if (ret < 0) {
+		netdev_dbg(dev->net, "Failed to read GPIOS: %d\n", ret);
+		return ret;
+	}
+
 	netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status);
 
 	asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0);
-	asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
+	ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
+	if (ret < 0) {
+		netdev_dbg(dev->net, "Failed to read EEPROM: %d\n", ret);
+		return ret;
+	}
+
 	asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0);
 
 	netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom);
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 82bb5ed94c48..a7c1434fe2da 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -21,6 +21,7 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/ipv6_stubs.h>
+#include <net/ndisc.h>
 
 /* alternative VLAN for IP session 0 if not untagged */
 #define MBIM_IPS0_VID	4094
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index bc1e3dd67c04..5567220e9d16 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -20,6 +20,8 @@
 #include <linux/of_net.h>
 #include <linux/mdio.h>
 #include <linux/phy.h>
+#include <net/selftests.h>
+
 #include "smsc95xx.h"
 
 #define SMSC_CHIPNAME			"smsc95xx"
@@ -727,6 +729,26 @@ static u32 smsc95xx_get_link(struct net_device *net)
 	return net->phydev->link;
 }
 
+static void smsc95xx_ethtool_get_strings(struct net_device *netdev, u32 sset,
+					u8 *data)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		net_selftest_get_strings(data);
+		break;
+	}
+}
+
+static int smsc95xx_ethtool_get_sset_count(struct net_device *ndev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return net_selftest_get_count();
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct ethtool_ops smsc95xx_ethtool_ops = {
 	.get_link	= smsc95xx_get_link,
 	.nway_reset	= phy_ethtool_nway_reset,
@@ -743,6 +765,9 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = {
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 	.get_ts_info	= ethtool_op_get_ts_info,
+	.self_test	= net_selftest,
+	.get_strings	= smsc95xx_ethtool_get_strings,
+	.get_sset_count	= smsc95xx_ethtool_get_sset_count,
 };
 
 static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index 8070f3fd98f0..7d4da9e605ef 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -10,9 +10,10 @@
 
 #include <linux/device.h>
 #include <linux/err.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -100,7 +101,6 @@ struct pch_ts_regs {
 #define PCH_ECS_ETH		(1 << 0)
 
 #define PCH_ECS_CAN		(1 << 1)
-#define PCH_STATION_BYTES	6
 
 #define PCH_IEEE1588_ETH	(1 << 0)
 #define PCH_IEEE1588_CAN	(1 << 1)
@@ -115,8 +115,6 @@ struct pch_dev {
 	int exts0_enabled;
 	int exts1_enabled;
 
-	u32 mem_base;
-	u32 mem_size;
 	u32 irq;
 	struct pci_dev *pdev;
 	spinlock_t register_lock;
@@ -148,28 +146,15 @@ static inline void pch_eth_enable_set(struct pch_dev *chip)
 static u64 pch_systime_read(struct pch_ts_regs __iomem *regs)
 {
 	u64 ns;
-	u32 lo, hi;
 
-	lo = ioread32(&regs->systime_lo);
-	hi = ioread32(&regs->systime_hi);
+	ns = ioread64_lo_hi(&regs->systime_lo);
 
-	ns = ((u64) hi) << 32;
-	ns |= lo;
-	ns <<= TICKS_NS_SHIFT;
-
-	return ns;
+	return ns << TICKS_NS_SHIFT;
 }
 
 static void pch_systime_write(struct pch_ts_regs __iomem *regs, u64 ns)
 {
-	u32 hi, lo;
-
-	ns >>= TICKS_NS_SHIFT;
-	hi = ns >> 32;
-	lo = ns & 0xffffffff;
-
-	iowrite32(lo, &regs->systime_lo);
-	iowrite32(hi, &regs->systime_hi);
+	iowrite64_lo_hi(ns >> TICKS_NS_SHIFT, &regs->systime_lo);
 }
 
 static inline void pch_block_reset(struct pch_dev *chip)
@@ -235,16 +220,10 @@ u64 pch_rx_snap_read(struct pci_dev *pdev)
 {
 	struct pch_dev *chip = pci_get_drvdata(pdev);
 	u64 ns;
-	u32 lo, hi;
 
-	lo = ioread32(&chip->regs->rx_snap_lo);
-	hi = ioread32(&chip->regs->rx_snap_hi);
+	ns = ioread64_lo_hi(&chip->regs->rx_snap_lo);
 
-	ns = ((u64) hi) << 32;
-	ns |= lo;
-	ns <<= TICKS_NS_SHIFT;
-
-	return ns;
+	return ns << TICKS_NS_SHIFT;
 }
 EXPORT_SYMBOL(pch_rx_snap_read);
 
@@ -252,16 +231,10 @@ u64 pch_tx_snap_read(struct pci_dev *pdev)
 {
 	struct pch_dev *chip = pci_get_drvdata(pdev);
 	u64 ns;
-	u32 lo, hi;
-
-	lo = ioread32(&chip->regs->tx_snap_lo);
-	hi = ioread32(&chip->regs->tx_snap_hi);
 
-	ns = ((u64) hi) << 32;
-	ns |= lo;
-	ns <<= TICKS_NS_SHIFT;
+	ns = ioread64_lo_hi(&chip->regs->tx_snap_lo);
 
-	return ns;
+	return ns << TICKS_NS_SHIFT;
 }
 EXPORT_SYMBOL(pch_tx_snap_read);
 
@@ -292,8 +265,9 @@ static void pch_reset(struct pch_dev *chip)
  */
 int pch_set_station_address(u8 *addr, struct pci_dev *pdev)
 {
-	s32 i;
 	struct pch_dev *chip = pci_get_drvdata(pdev);
+	bool valid;
+	u64 mac;
 
 	/* Verify the parameter */
 	if ((chip->regs == NULL) || addr == (u8 *)NULL) {
@@ -301,37 +275,15 @@ int pch_set_station_address(u8 *addr, struct pci_dev *pdev)
 			"invalid params returning PCH_INVALIDPARAM\n");
 		return PCH_INVALIDPARAM;
 	}
-	/* For all station address bytes */
-	for (i = 0; i < PCH_STATION_BYTES; i++) {
-		u32 val;
-		s32 tmp;
-
-		tmp = hex_to_bin(addr[i * 3]);
-		if (tmp < 0) {
-			dev_err(&pdev->dev,
-				"invalid params returning PCH_INVALIDPARAM\n");
-			return PCH_INVALIDPARAM;
-		}
-		val = tmp * 16;
-		tmp = hex_to_bin(addr[(i * 3) + 1]);
-		if (tmp < 0) {
-			dev_err(&pdev->dev,
-				"invalid params returning PCH_INVALIDPARAM\n");
-			return PCH_INVALIDPARAM;
-		}
-		val += tmp;
-		/* Expects ':' separated addresses */
-		if ((i < 5) && (addr[(i * 3) + 2] != ':')) {
-			dev_err(&pdev->dev,
-				"invalid params returning PCH_INVALIDPARAM\n");
-			return PCH_INVALIDPARAM;
-		}
 
-		/* Ideally we should set the address only after validating
-							 entire string */
-		dev_dbg(&pdev->dev, "invoking pch_station_set\n");
-		iowrite32(val, &chip->regs->ts_st[i]);
+	valid = mac_pton(addr, (u8 *)&mac);
+	if (!valid) {
+		dev_err(&pdev->dev, "invalid params returning PCH_INVALIDPARAM\n");
+		return PCH_INVALIDPARAM;
 	}
+
+	dev_dbg(&pdev->dev, "invoking pch_station_set\n");
+	iowrite64_lo_hi(mac, &chip->regs->ts_st);
 	return 0;
 }
 EXPORT_SYMBOL(pch_set_station_address);
@@ -344,19 +296,16 @@ static irqreturn_t isr(int irq, void *priv)
 	struct pch_dev *pch_dev = priv;
 	struct pch_ts_regs __iomem *regs = pch_dev->regs;
 	struct ptp_clock_event event;
-	u32 ack = 0, lo, hi, val;
+	u32 ack = 0, val;
 
 	val = ioread32(&regs->event);
 
 	if (val & PCH_TSE_SNS) {
 		ack |= PCH_TSE_SNS;
 		if (pch_dev->exts0_enabled) {
-			hi = ioread32(&regs->asms_hi);
-			lo = ioread32(&regs->asms_lo);
 			event.type = PTP_CLOCK_EXTTS;
 			event.index = 0;
-			event.timestamp = ((u64) hi) << 32;
-			event.timestamp |= lo;
+			event.timestamp = ioread64_hi_lo(&regs->asms_hi);
 			event.timestamp <<= TICKS_NS_SHIFT;
 			ptp_clock_event(pch_dev->ptp_clock, &event);
 		}
@@ -365,12 +314,9 @@ static irqreturn_t isr(int irq, void *priv)
 	if (val & PCH_TSE_SNM) {
 		ack |= PCH_TSE_SNM;
 		if (pch_dev->exts1_enabled) {
-			hi = ioread32(&regs->amms_hi);
-			lo = ioread32(&regs->amms_lo);
 			event.type = PTP_CLOCK_EXTTS;
 			event.index = 1;
-			event.timestamp = ((u64) hi) << 32;
-			event.timestamp |= lo;
+			event.timestamp = ioread64_hi_lo(&regs->asms_hi);
 			event.timestamp <<= TICKS_NS_SHIFT;
 			ptp_clock_event(pch_dev->ptp_clock, &event);
 		}
@@ -501,31 +447,12 @@ static const struct ptp_clock_info ptp_pch_caps = {
 	.enable		= ptp_pch_enable,
 };
 
-#define pch_suspend NULL
-#define pch_resume NULL
-
 static void pch_remove(struct pci_dev *pdev)
 {
 	struct pch_dev *chip = pci_get_drvdata(pdev);
 
+	free_irq(pdev->irq, chip);
 	ptp_clock_unregister(chip->ptp_clock);
-	/* free the interrupt */
-	if (pdev->irq != 0)
-		free_irq(pdev->irq, chip);
-
-	/* unmap the virtual IO memory space */
-	if (chip->regs != NULL) {
-		iounmap(chip->regs);
-		chip->regs = NULL;
-	}
-	/* release the reserved IO memory space */
-	if (chip->mem_base != 0) {
-		release_mem_region(chip->mem_base, chip->mem_size);
-		chip->mem_base = 0;
-	}
-	pci_disable_device(pdev);
-	kfree(chip);
-	dev_info(&pdev->dev, "complete\n");
 }
 
 static s32
@@ -535,50 +462,29 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	unsigned long flags;
 	struct pch_dev *chip;
 
-	chip = kzalloc(sizeof(struct pch_dev), GFP_KERNEL);
+	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
 	if (chip == NULL)
 		return -ENOMEM;
 
 	/* enable the 1588 pci device */
-	ret = pci_enable_device(pdev);
+	ret = pcim_enable_device(pdev);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "could not enable the pci device\n");
-		goto err_pci_en;
+		return ret;
 	}
 
-	chip->mem_base = pci_resource_start(pdev, IO_MEM_BAR);
-	if (!chip->mem_base) {
+	ret = pcim_iomap_regions(pdev, BIT(IO_MEM_BAR), "1588_regs");
+	if (ret) {
 		dev_err(&pdev->dev, "could not locate IO memory address\n");
-		ret = -ENODEV;
-		goto err_pci_start;
-	}
-
-	/* retrieve the available length of the IO memory space */
-	chip->mem_size = pci_resource_len(pdev, IO_MEM_BAR);
-
-	/* allocate the memory for the device registers */
-	if (!request_mem_region(chip->mem_base, chip->mem_size, "1588_regs")) {
-		dev_err(&pdev->dev,
-			"could not allocate register memory space\n");
-		ret = -EBUSY;
-		goto err_req_mem_region;
+		return ret;
 	}
 
 	/* get the virtual address to the 1588 registers */
-	chip->regs = ioremap(chip->mem_base, chip->mem_size);
-
-	if (!chip->regs) {
-		dev_err(&pdev->dev, "Could not get virtual address\n");
-		ret = -ENOMEM;
-		goto err_ioremap;
-	}
-
+	chip->regs = pcim_iomap_table(pdev)[IO_MEM_BAR];
 	chip->caps = ptp_pch_caps;
 	chip->ptp_clock = ptp_clock_register(&chip->caps, &pdev->dev);
-	if (IS_ERR(chip->ptp_clock)) {
-		ret = PTR_ERR(chip->ptp_clock);
-		goto err_ptp_clock_reg;
-	}
+	if (IS_ERR(chip->ptp_clock))
+		return PTR_ERR(chip->ptp_clock);
 
 	spin_lock_init(&chip->register_lock);
 
@@ -598,8 +504,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pch_reset(chip);
 
 	iowrite32(DEFAULT_ADDEND, &chip->regs->addend);
-	iowrite32(1, &chip->regs->trgt_lo);
-	iowrite32(0, &chip->regs->trgt_hi);
+	iowrite64_lo_hi(1, &chip->regs->trgt_lo);
 	iowrite32(PCH_TSE_TTIPEND, &chip->regs->event);
 
 	pch_eth_enable_set(chip);
@@ -617,21 +522,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 err_req_irq:
 	ptp_clock_unregister(chip->ptp_clock);
-err_ptp_clock_reg:
-	iounmap(chip->regs);
-	chip->regs = NULL;
 
-err_ioremap:
-	release_mem_region(chip->mem_base, chip->mem_size);
-
-err_req_mem_region:
-	chip->mem_base = 0;
-
-err_pci_start:
-	pci_disable_device(pdev);
-
-err_pci_en:
-	kfree(chip);
 	dev_err(&pdev->dev, "probe failed(ret=0x%x)\n", ret);
 
 	return ret;
@@ -646,33 +537,13 @@ static const struct pci_device_id pch_ieee1588_pcidev_id[] = {
 };
 MODULE_DEVICE_TABLE(pci, pch_ieee1588_pcidev_id);
 
-static SIMPLE_DEV_PM_OPS(pch_pm_ops, pch_suspend, pch_resume);
-
 static struct pci_driver pch_driver = {
 	.name = KBUILD_MODNAME,
 	.id_table = pch_ieee1588_pcidev_id,
 	.probe = pch_probe,
 	.remove = pch_remove,
-	.driver.pm = &pch_pm_ops,
 };
-
-static void __exit ptp_pch_exit(void)
-{
-	pci_unregister_driver(&pch_driver);
-}
-
-static s32 __init ptp_pch_init(void)
-{
-	s32 ret;
-
-	/* register the driver with the pci core */
-	ret = pci_register_driver(&pch_driver);
-
-	return ret;
-}
-
-module_init(ptp_pch_init);
-module_exit(ptp_pch_exit);
+module_pci_driver(pch_driver);
 
 module_param_string(station,
 		    pch_param.station, sizeof(pch_param.station), 0444);
diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c
index 058b78fac5e3..0a3fb6c115f4 100644
--- a/drivers/soc/fsl/dpio/qbman-portal.c
+++ b/drivers/soc/fsl/dpio/qbman-portal.c
@@ -743,8 +743,8 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
 	full_mask = s->eqcr.pi_ci_mask;
 	if (!s->eqcr.available) {
 		eqcr_ci = s->eqcr.ci;
-		p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
-		s->eqcr.ci = *p & full_mask;
+		s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
+		s->eqcr.ci &= full_mask;
 		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
 					eqcr_ci, s->eqcr.ci);
 		if (!s->eqcr.available) {
@@ -887,8 +887,8 @@ int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 	full_mask = s->eqcr.pi_ci_mask;
 	if (!s->eqcr.available) {
 		eqcr_ci = s->eqcr.ci;
-		p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
-		s->eqcr.ci = *p & full_mask;
+		s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
+		s->eqcr.ci &= full_mask;
 		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
 					eqcr_ci, s->eqcr.ci);
 		if (!s->eqcr.available)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index b525d8cdc25b..88a51b242adc 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -8,6 +8,7 @@
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
 #include <linux/rbtree.h>
+#include <net/sock.h>
 #include <uapi/linux/bpf.h>
 
 struct sock;
@@ -165,11 +166,23 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 				     void *value, u64 flags);
 
+/* Opportunistic check to see whether we have any BPF program attached*/
+static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
+					   enum cgroup_bpf_attach_type type)
+{
+	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	struct bpf_prog_array *array;
+
+	array = rcu_access_pointer(cgrp->bpf.effective[type]);
+	return array != &bpf_empty_prog_array.hdr;
+}
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
 	int __ret = 0;							      \
-	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))		      \
+	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) &&			      \
+	    cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS))		      \
 		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
 						    CGROUP_INET_INGRESS); \
 									      \
@@ -181,7 +194,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
 		typeof(sk) __sk = sk_to_full_sk(sk);			       \
-		if (sk_fullsock(__sk))					       \
+		if (sk_fullsock(__sk) &&				       \
+		    cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS))	       \
 			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
 						      CGROUP_INET_EGRESS); \
 	}								       \
@@ -347,7 +361,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 				       kernel_optval)			       \
 ({									       \
 	int __ret = 0;							       \
-	if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT))			       \
+	if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) &&			       \
+	    cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT))		       \
 		__ret = __cgroup_bpf_run_filter_setsockopt(sock, level,	       \
 							   optname, optval,    \
 							   optlen,	       \
@@ -367,7 +382,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 				       max_optlen, retval)		       \
 ({									       \
 	int __ret = retval;						       \
-	if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))			       \
+	if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) &&			       \
+	    cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT))		       \
 		if (!(sock)->sk_prot->bpf_bypass_getsockopt ||		       \
 		    !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
 					tcp_bpf_bypass_getsockopt,	       \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8c92c974bd12..2fc7e5c5ef41 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -332,7 +332,10 @@ enum bpf_type_flag {
 	 */
 	MEM_ALLOC		= BIT(2 + BPF_BASE_TYPE_BITS),
 
-	__BPF_TYPE_LAST_FLAG	= MEM_ALLOC,
+	/* MEM is in user address space. */
+	MEM_USER		= BIT(3 + BPF_BASE_TYPE_BITS),
+
+	__BPF_TYPE_LAST_FLAG	= MEM_USER,
 };
 
 /* Max number of base types. */
@@ -588,7 +591,7 @@ struct bpf_verifier_ops {
 				 const struct btf *btf,
 				 const struct btf_type *t, int off, int size,
 				 enum bpf_access_type atype,
-				 u32 *next_btf_id);
+				 u32 *next_btf_id, enum bpf_type_flag *flag);
 };
 
 struct bpf_prog_offload_ops {
@@ -843,8 +846,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
 void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
-int bpf_jit_charge_modmem(u32 pages);
-void bpf_jit_uncharge_modmem(u32 pages);
+int bpf_jit_charge_modmem(u32 size);
+void bpf_jit_uncharge_modmem(u32 size);
 bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
 #else
 static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
@@ -950,6 +953,7 @@ struct bpf_prog_aux {
 	bool sleepable;
 	bool tail_call_reachable;
 	bool xdp_has_frags;
+	bool use_bpf_prog_pack;
 	struct hlist_node tramp_hlist;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
@@ -1233,6 +1237,19 @@ struct bpf_prog_array {
 	struct bpf_prog_array_item items[];
 };
 
+struct bpf_empty_prog_array {
+	struct bpf_prog_array hdr;
+	struct bpf_prog *null_prog;
+};
+
+/* to avoid allocating empty bpf_prog_array for cgroups that
+ * don't have bpf program attached use one global 'bpf_empty_prog_array'
+ * It will not be modified the caller of bpf_prog_array_alloc()
+ * (since caller requested prog_cnt == 0)
+ * that pointer should be 'freed' by bpf_prog_array_free()
+ */
+extern struct bpf_empty_prog_array bpf_empty_prog_array;
+
 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
 void bpf_prog_array_free(struct bpf_prog_array *progs);
 int bpf_prog_array_length(struct bpf_prog_array *progs);
@@ -1767,7 +1784,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
 int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
 		      const struct btf_type *t, int off, int size,
 		      enum bpf_access_type atype,
-		      u32 *next_btf_id);
+		      u32 *next_btf_id, enum bpf_type_flag *flag);
 bool btf_struct_ids_match(struct bpf_verifier_log *log,
 			  const struct btf *btf, u32 id, int off,
 			  const struct btf *need_btf, u32 need_type_id);
@@ -1875,11 +1892,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 	return -EOPNOTSUPP;
 }
 
-static inline bool dev_map_can_have_prog(struct bpf_map *map)
-{
-	return false;
-}
-
 static inline void __dev_flush(void)
 {
 }
@@ -1943,11 +1955,6 @@ static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
 	return -EOPNOTSUPP;
 }
 
-static inline bool cpu_map_prog_allowed(struct bpf_map *map)
-{
-	return false;
-}
-
 static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
 				enum bpf_prog_type type)
 {
@@ -2243,6 +2250,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -2355,6 +2363,8 @@ enum bpf_text_poke_type {
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		       void *addr1, void *addr2);
 
+void *bpf_arch_text_copy(void *dst, void *src, size_t len);
+
 struct btf_id_set;
 bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
 
diff --git a/include/linux/btf.h b/include/linux/btf.h
index b12cfe3b12bb..36bc09b8e890 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -238,6 +238,11 @@ static inline bool btf_type_is_var(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
 }
 
+static inline bool btf_type_is_type_tag(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG;
+}
+
 /* union is only a special case of struct:
  * all its offsetof(member) == 0
  */
@@ -322,6 +327,11 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo(
 	return (const struct btf_var_secinfo *)(t + 1);
 }
 
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+	return (struct btf_param *)(t + 1);
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 struct bpf_prog;
 
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 3c1795fdb568..3f31ff400432 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -31,6 +31,9 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 # define __kernel
 # ifdef STRUCTLEAK_PLUGIN
 #  define __user	__attribute__((user))
+# elif defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \
+	__has_attribute(btf_type_tag)
+#  define __user	__attribute__((btf_type_tag("user")))
 # else
 #  define __user
 # endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d23e999dc032..1cb1af917617 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,7 +548,7 @@ struct sock_fprog_kern {
 #define BPF_IMAGE_ALIGNMENT 8
 
 struct bpf_binary_header {
-	u32 pages;
+	u32 size;
 	u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
 };
 
@@ -886,17 +886,8 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
 	set_vm_flush_reset_perms(hdr);
-	set_memory_ro((unsigned long)hdr, hdr->pages);
-	set_memory_x((unsigned long)hdr, hdr->pages);
-}
-
-static inline struct bpf_binary_header *
-bpf_jit_binary_hdr(const struct bpf_prog *fp)
-{
-	unsigned long real_start = (unsigned long)fp->bpf_func;
-	unsigned long addr = real_start & PAGE_MASK;
-
-	return (void *)addr;
+	set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
+	set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
 }
 
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
@@ -1068,6 +1059,18 @@ void *bpf_jit_alloc_exec(unsigned long size);
 void bpf_jit_free_exec(void *addr);
 void bpf_jit_free(struct bpf_prog *fp);
 
+struct bpf_binary_header *
+bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
+			  unsigned int alignment,
+			  struct bpf_binary_header **rw_hdr,
+			  u8 **rw_image,
+			  bpf_jit_fill_hole_t bpf_fill_ill_insns);
+int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
+				 struct bpf_binary_header *ro_header,
+				 struct bpf_binary_header *rw_header);
+void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
+			      struct bpf_binary_header *rw_header);
+
 int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
 				struct bpf_jit_poke_descriptor *poke);
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1e0f8a31f3de..16870f86c74d 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -51,7 +51,7 @@ struct ipv6_devconf {
 	__s32		use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
-	__s32		mc_forwarding;
+	atomic_t	mc_forwarding;
 #endif
 	__s32		disable_ipv6;
 	__s32		drop_unicast_in_l2_multicast;
diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h
index 6b3267b49755..ed42bd5f639f 100644
--- a/include/linux/net/intel/i40e_client.h
+++ b/include/linux/net/intel/i40e_client.h
@@ -26,11 +26,6 @@ struct i40e_client_version {
 	u8 rsvd;
 };
 
-enum i40e_client_state {
-	__I40E_CLIENT_NULL,
-	__I40E_CLIENT_REGISTERED
-};
-
 enum i40e_client_instance_state {
 	__I40E_CLIENT_INSTANCE_NONE,
 	__I40E_CLIENT_INSTANCE_OPENED,
@@ -190,11 +185,6 @@ struct i40e_client {
 	const struct i40e_client_ops *ops; /* client ops provided by the client */
 };
 
-static inline bool i40e_client_is_registered(struct i40e_client *client)
-{
-	return test_bit(__I40E_CLIENT_REGISTERED, &client->state);
-}
-
 void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client);
 void i40e_client_device_unregister(struct i40e_info *ldev);
 
diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h
index 1289593411d3..1c1332e4df26 100644
--- a/include/linux/net/intel/iidc.h
+++ b/include/linux/net/intel/iidc.h
@@ -32,6 +32,8 @@ enum iidc_rdma_protocol {
 };
 
 #define IIDC_MAX_USER_PRIORITY		8
+#define IIDC_MAX_DSCP_MAPPING		64
+#define IIDC_DSCP_PFC_MODE		0x1
 
 /* Struct to hold per RDMA Qset info */
 struct iidc_rdma_qset_params {
@@ -60,6 +62,8 @@ struct iidc_qos_params {
 	u8 vport_relative_bw;
 	u8 vport_priority_type;
 	u8 num_tc;
+	u8 pfc_mode;
+	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
 };
 
 struct iidc_event {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e490b84732d1..5f6e2c0b0c90 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1948,6 +1948,8 @@ enum netdev_ml_priv_type {
  *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
  *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
  *	@watchdog_dev_tracker:	refcount tracker used by watchdog.
+ *	@dev_registered_tracker:	tracker for reference held while
+ *					registered
  *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
@@ -2282,6 +2284,7 @@ struct net_device {
 	u8 dev_addr_shadow[MAX_ADDR_LEN];
 	netdevice_tracker	linkwatch_dev_tracker;
 	netdevice_tracker	watchdog_dev_tracker;
+	netdevice_tracker	dev_registered_tracker;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -3817,14 +3820,7 @@ extern unsigned int	netdev_budget_usecs;
 /* Called by rtnetlink.c:rtnl_unlock() */
 void netdev_run_todo(void);
 
-/**
- *	dev_put - release reference to device
- *	@dev: network device
- *
- * Release reference to device to allow it to be freed.
- * Try using dev_put_track() instead.
- */
-static inline void dev_put(struct net_device *dev)
+static inline void __dev_put(struct net_device *dev)
 {
 	if (dev) {
 #ifdef CONFIG_PCPU_DEV_REFCNT
@@ -3835,14 +3831,7 @@ static inline void dev_put(struct net_device *dev)
 	}
 }
 
-/**
- *	dev_hold - get reference to device
- *	@dev: network device
- *
- * Hold reference to device to keep it from being freed.
- * Try using dev_hold_track() instead.
- */
-static inline void dev_hold(struct net_device *dev)
+static inline void __dev_hold(struct net_device *dev)
 {
 	if (dev) {
 #ifdef CONFIG_PCPU_DEV_REFCNT
@@ -3853,11 +3842,24 @@ static inline void dev_hold(struct net_device *dev)
 	}
 }
 
+static inline void __netdev_tracker_alloc(struct net_device *dev,
+					  netdevice_tracker *tracker,
+					  gfp_t gfp)
+{
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+	ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+#endif
+}
+
+/* netdev_tracker_alloc() can upgrade a prior untracked reference
+ * taken by dev_get_by_name()/dev_get_by_index() to a tracked one.
+ */
 static inline void netdev_tracker_alloc(struct net_device *dev,
 					netdevice_tracker *tracker, gfp_t gfp)
 {
 #ifdef CONFIG_NET_DEV_REFCNT_TRACKER
-	ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+	refcount_dec(&dev->refcnt_tracker.no_tracker);
+	__netdev_tracker_alloc(dev, tracker, gfp);
 #endif
 }
 
@@ -3873,8 +3875,8 @@ static inline void dev_hold_track(struct net_device *dev,
 				  netdevice_tracker *tracker, gfp_t gfp)
 {
 	if (dev) {
-		dev_hold(dev);
-		netdev_tracker_alloc(dev, tracker, gfp);
+		__dev_hold(dev);
+		__netdev_tracker_alloc(dev, tracker, gfp);
 	}
 }
 
@@ -3883,10 +3885,34 @@ static inline void dev_put_track(struct net_device *dev,
 {
 	if (dev) {
 		netdev_tracker_free(dev, tracker);
-		dev_put(dev);
+		__dev_put(dev);
 	}
 }
 
+/**
+ *	dev_hold - get reference to device
+ *	@dev: network device
+ *
+ * Hold reference to device to keep it from being freed.
+ * Try using dev_hold_track() instead.
+ */
+static inline void dev_hold(struct net_device *dev)
+{
+	dev_hold_track(dev, NULL, GFP_ATOMIC);
+}
+
+/**
+ *	dev_put - release reference to device
+ *	@dev: network device
+ *
+ * Release reference to device to allow it to be freed.
+ * Try using dev_put_track() instead.
+ */
+static inline void dev_put(struct net_device *dev)
+{
+	dev_put_track(dev, NULL);
+}
+
 static inline void dev_replace_track(struct net_device *odev,
 				     struct net_device *ndev,
 				     netdevice_tracker *tracker,
@@ -3895,11 +3921,11 @@ static inline void dev_replace_track(struct net_device *odev,
 	if (odev)
 		netdev_tracker_free(odev, tracker);
 
-	dev_hold(ndev);
-	dev_put(odev);
+	__dev_hold(ndev);
+	__dev_put(odev);
 
 	if (ndev)
-		netdev_tracker_alloc(ndev, tracker, gfp);
+		__netdev_tracker_alloc(ndev, tracker, gfp);
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 713a0c928b7c..cca149f78d35 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -582,7 +582,6 @@ int phylink_speed_up(struct phylink *pl);
 #define phylink_test(bm, mode)	__phylink_do_bit(test_bit, bm, mode)
 
 void phylink_set_port_modes(unsigned long *bits);
-void phylink_set_10g_modes(unsigned long *mask);
 void phylink_helper_basex_speed(struct phylink_link_state *state);
 
 void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 60f3453be23e..9ca353ab712b 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -13,6 +13,8 @@ struct ref_tracker_dir {
 	spinlock_t		lock;
 	unsigned int		quarantine_avail;
 	refcount_t		untracked;
+	refcount_t		no_tracker;
+	bool			dead;
 	struct list_head	list; /* List of active trackers */
 	struct list_head	quarantine; /* List of dead trackers */
 #endif
@@ -26,7 +28,9 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
 	INIT_LIST_HEAD(&dir->quarantine);
 	spin_lock_init(&dir->lock);
 	dir->quarantine_avail = quarantine_count;
+	dir->dead = false;
 	refcount_set(&dir->untracked, 1);
+	refcount_set(&dir->no_tracker, 1);
 	stack_depot_init();
 }
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a27bcc4f7e9a..a5adbf6b51e8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -314,12 +314,38 @@ struct sk_buff;
  * used to translate the reason to string.
  */
 enum skb_drop_reason {
-	SKB_DROP_REASON_NOT_SPECIFIED,
-	SKB_DROP_REASON_NO_SOCKET,
-	SKB_DROP_REASON_PKT_TOO_SMALL,
-	SKB_DROP_REASON_TCP_CSUM,
-	SKB_DROP_REASON_SOCKET_FILTER,
-	SKB_DROP_REASON_UDP_CSUM,
+	SKB_DROP_REASON_NOT_SPECIFIED,	/* drop reason is not specified */
+	SKB_DROP_REASON_NO_SOCKET,	/* socket not found */
+	SKB_DROP_REASON_PKT_TOO_SMALL,	/* packet size is too small */
+	SKB_DROP_REASON_TCP_CSUM,	/* TCP checksum error */
+	SKB_DROP_REASON_SOCKET_FILTER,	/* dropped by socket filter */
+	SKB_DROP_REASON_UDP_CSUM,	/* UDP checksum error */
+	SKB_DROP_REASON_NETFILTER_DROP,	/* dropped by netfilter */
+	SKB_DROP_REASON_OTHERHOST,	/* packet don't belong to current
+					 * host (interface is in promisc
+					 * mode)
+					 */
+	SKB_DROP_REASON_IP_CSUM,	/* IP checksum error */
+	SKB_DROP_REASON_IP_INHDR,	/* there is something wrong with
+					 * IP header (see
+					 * IPSTATS_MIB_INHDRERRORS)
+					 */
+	SKB_DROP_REASON_IP_RPFILTER,	/* IP rpfilter validate failed.
+					 * see the document for rp_filter
+					 * in ip-sysctl.rst for more
+					 * information
+					 */
+	SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2
+						  * is multicast, but L3 is
+						  * unicast.
+						  */
+	SKB_DROP_REASON_XFRM_POLICY,	/* xfrm policy check failed */
+	SKB_DROP_REASON_IP_NOPROTO,	/* no support for IP protocol */
+	SKB_DROP_REASON_SOCKET_RCVBUFF,	/* socket receive buff is full */
+	SKB_DROP_REASON_PROTO_MEM,	/* proto memory limition, such as
+					 * udp packet drop out of
+					 * udp_memory_allocated.
+					 */
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 18a717fe62eb..fdb5375f0562 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -29,7 +29,7 @@ struct sk_msg_sg {
 	u32				end;
 	u32				size;
 	u32				copybreak;
-	unsigned long			copy;
+	DECLARE_BITMAP(copy, MAX_MSG_FRAGS + 2);
 	/* The extra two elements:
 	 * 1) used for chaining the front and sections when the list becomes
 	 *    partitioned (e.g. end < start). The crypto APIs require the
@@ -38,7 +38,6 @@ struct sk_msg_sg {
 	 */
 	struct scatterlist		data[MAX_MSG_FRAGS + 2];
 };
-static_assert(BITS_PER_LONG >= NR_MSG_FRAG_IDS);
 
 /* UAPI in filter.c depends on struct sk_msg_sg being first element. */
 struct sk_msg {
@@ -171,11 +170,6 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end)
 #define sk_msg_iter_next(msg, which)			\
 	sk_msg_iter_var_next(msg->sg.which)
 
-static inline void sk_msg_clear_meta(struct sk_msg *msg)
-{
-	memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy));
-}
-
 static inline void sk_msg_init(struct sk_msg *msg)
 {
 	BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS);
@@ -234,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
 {
 	struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
 
-	if (test_bit(msg->sg.start, &msg->sg.copy)) {
+	if (test_bit(msg->sg.start, msg->sg.copy)) {
 		msg->data = NULL;
 		msg->data_end = NULL;
 	} else {
@@ -253,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
 	sg_set_page(sge, page, len, offset);
 	sg_unmark_end(sge);
 
-	__set_bit(msg->sg.end, &msg->sg.copy);
+	__set_bit(msg->sg.end, msg->sg.copy);
 	msg->sg.size += len;
 	sk_msg_iter_next(msg, end);
 }
@@ -262,9 +256,9 @@ static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
 {
 	do {
 		if (copy_state)
-			__set_bit(i, &msg->sg.copy);
+			__set_bit(i, msg->sg.copy);
 		else
-			__clear_bit(i, &msg->sg.copy);
+			__clear_bit(i, msg->sg.copy);
 		sk_msg_iter_var_next(i);
 		if (i == msg->sg.end)
 			break;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 1198a2bfc9bf..739285fe5a2f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -273,6 +273,23 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 	i->count = count;
 }
 
+static inline int
+iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes)
+{
+	size_t shorted = 0;
+	int npages;
+
+	if (iov_iter_count(i) > max_bytes) {
+		shorted = iov_iter_count(i) - max_bytes;
+		iov_iter_truncate(i, max_bytes);
+	}
+	npages = iov_iter_npages(i, INT_MAX);
+	if (shorted)
+		iov_iter_reexpand(i, iov_iter_count(i) + shorted);
+
+	return npages;
+}
+
 struct csum_state {
 	__wsum csum;
 	size_t off;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index ca8c14b547b4..fd1f62a6e0a8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -1281,7 +1281,7 @@ module_exit(dsa_tag_driver_module_exit)
 /**
  * module_dsa_tag_drivers() - Helper macro for registering DSA tag
  * drivers
- * @__ops_array: Array of tag driver strucutres
+ * @__ops_array: Array of tag driver structures
  *
  * Helper macro for DSA tag drivers which do not do anything special
  * in module init/exit. Each module may only use this macro once, and
diff --git a/include/net/gro.h b/include/net/gro.h
index 8f75802d50fd..a765fedda5c4 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -29,46 +29,50 @@ struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;
 
-	/* Start offset for remote checksum offload */
-	u16	gro_remcsum_start;
+	/* Used in ipv6_gro_receive() and foo-over-udp */
+	u16	proto;
 
 	/* jiffies when first packet was created/queued */
 	unsigned long age;
 
-	/* Used in ipv6_gro_receive() and foo-over-udp */
-	u16	proto;
+	/* portion of the cb set to zero at every gro iteration */
+	struct_group(zeroed,
+
+		/* Start offset for remote checksum offload */
+		u16	gro_remcsum_start;
 
-	/* This is non-zero if the packet may be of the same flow. */
-	u8	same_flow:1;
+		/* This is non-zero if the packet may be of the same flow. */
+		u8	same_flow:1;
 
-	/* Used in tunnel GRO receive */
-	u8	encap_mark:1;
+		/* Used in tunnel GRO receive */
+		u8	encap_mark:1;
 
-	/* GRO checksum is valid */
-	u8	csum_valid:1;
+		/* GRO checksum is valid */
+		u8	csum_valid:1;
 
-	/* Number of checksums via CHECKSUM_UNNECESSARY */
-	u8	csum_cnt:3;
+		/* Number of checksums via CHECKSUM_UNNECESSARY */
+		u8	csum_cnt:3;
 
-	/* Free the skb? */
-	u8	free:2;
+		/* Free the skb? */
+		u8	free:2;
 #define NAPI_GRO_FREE		  1
 #define NAPI_GRO_FREE_STOLEN_HEAD 2
 
-	/* Used in foo-over-udp, set in udp[46]_gro_receive */
-	u8	is_ipv6:1;
+		/* Used in foo-over-udp, set in udp[46]_gro_receive */
+		u8	is_ipv6:1;
 
-	/* Used in GRE, set in fou/gue_gro_receive */
-	u8	is_fou:1;
+		/* Used in GRE, set in fou/gue_gro_receive */
+		u8	is_fou:1;
 
-	/* Used to determine if flush_id can be ignored */
-	u8	is_atomic:1;
+		/* Used to determine if flush_id can be ignored */
+		u8	is_atomic:1;
 
-	/* Number of gro_receive callbacks this packet already went through */
-	u8 recursion_counter:4;
+		/* Number of gro_receive callbacks this packet already went through */
+		u8 recursion_counter:4;
 
-	/* GRO is done by frag_list pointer chaining. */
-	u8	is_flist:1;
+		/* GRO is done by frag_list pointer chaining. */
+		u8	is_flist:1;
+	);
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h
new file mode 100644
index 000000000000..72f250dffada
--- /dev/null
+++ b/include/net/inet_dscp.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP)
+ *
+ * DSCP is defined in RFC 2474:
+ *
+ *        0   1   2   3   4   5   6   7
+ *      +---+---+---+---+---+---+---+---+
+ *      |         DSCP          |  CU   |
+ *      +---+---+---+---+---+---+---+---+
+ *
+ *        DSCP: differentiated services codepoint
+ *        CU:   currently unused
+ *
+ * The whole DSCP + CU bits form the DS field.
+ * The DS field is also commonly called TOS or Traffic Class (for IPv6).
+ *
+ * Note: the CU bits are now used for Explicit Congestion Notification
+ *       (RFC 3168).
+ */
+
+#ifndef _INET_DSCP_H
+#define _INET_DSCP_H
+
+#include <linux/types.h>
+
+/* Special type for storing DSCP values.
+ *
+ * A dscp_t variable stores a DS field with the CU (ECN) bits cleared.
+ * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding
+ * bugs where ECN bits are erroneously taken into account during FIB lookups
+ * or policy routing.
+ *
+ * Note: to get the real DSCP value contained in a dscp_t variable one would
+ * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have
+ * a helper for that, but there's currently no users.
+ */
+typedef u8 __bitwise dscp_t;
+
+#define INET_DSCP_MASK 0xfc
+
+static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield)
+{
+	return (__force dscp_t)(dsfield & INET_DSCP_MASK);
+}
+
+static inline __u8 inet_dscp_to_dsfield(dscp_t dscp)
+{
+	return (__force __u8)dscp;
+}
+
+static inline bool inet_validate_dscp(__u8 val)
+{
+	return !(val & ~INET_DSCP_MASK);
+}
+
+#endif /* _INET_DSCP_H */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c4297704bbcb..6a82bcb8813b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -17,6 +17,7 @@
 #include <linux/rcupdate.h>
 #include <net/fib_notifier.h>
 #include <net/fib_rules.h>
+#include <net/inet_dscp.h>
 #include <net/inetpeer.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
@@ -24,7 +25,7 @@
 
 struct fib_config {
 	u8			fc_dst_len;
-	u8			fc_tos;
+	dscp_t			fc_dscp;
 	u8			fc_protocol;
 	u8			fc_scope;
 	u8			fc_type;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 082f30256f59..f693784e1419 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -15,9 +15,9 @@
 #include <linux/refcount.h>
 #include <linux/jump_label_ratelimit.h>
 #include <net/if_inet6.h>
-#include <net/ndisc.h>
 #include <net/flow.h>
 #include <net/flow_dissector.h>
+#include <net/inet_dscp.h>
 #include <net/snmp.h>
 #include <net/netns/hash.h>
 
@@ -975,6 +975,11 @@ static inline u8 ip6_tclass(__be32 flowinfo)
 	return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
 }
 
+static inline dscp_t ip6_dscp(__be32 flowinfo)
+{
+	return inet_dsfield_to_dscp(ip6_tclass(flowinfo));
+}
+
 static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
 {
 	return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 0a4779175a52..5052c66e22d2 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _IPV6_FRAG_H
 #define _IPV6_FRAG_H
+#include <linux/icmpv6.h>
 #include <linux/kernel.h>
 #include <net/addrconf.h>
 #include <net/ipv6.h>
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 7e35ec79b909..e80a4baf8379 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -45,6 +45,11 @@ static inline bool mctp_address_ok(mctp_eid_t eid)
 	return eid >= 8 && eid < 255;
 }
 
+static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid)
+{
+	return match == eid || match == MCTP_ADDR_ANY;
+}
+
 static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
 {
 	return (struct mctp_hdr *)skb_network_header(skb);
@@ -121,7 +126,7 @@ struct mctp_sock {
  */
 struct mctp_sk_key {
 	mctp_eid_t	peer_addr;
-	mctp_eid_t	local_addr;
+	mctp_eid_t	local_addr; /* MCTP_ADDR_ANY for local owned tags */
 	__u8		tag; /* incoming tag match; invert TO for local */
 
 	/* we hold a ref to sk when set */
@@ -158,6 +163,12 @@ struct mctp_sk_key {
 	 */
 	unsigned long	dev_flow_state;
 	struct mctp_dev	*dev;
+
+	/* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire
+	 * automatically on timeout or response, instead SIOCMCTPDROPTAG
+	 * is used.
+	 */
+	bool		manual_alloc;
 };
 
 struct mctp_skb_cb {
@@ -234,6 +245,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
 
 void mctp_key_unref(struct mctp_sk_key *key);
+struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+					 mctp_eid_t daddr, mctp_eid_t saddr,
+					 bool manual, u8 *tagp);
 
 /* routing <--> device interface */
 unsigned int mctp_default_net(struct net *net);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 5b61c462e534..374cc7b260fc 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -513,4 +513,10 @@ static inline void fnhe_genid_bump(struct net *net)
 	atomic_inc(&net->fnhe_genid);
 }
 
+#ifdef CONFIG_NET
+void net_ns_init(void);
+#else
+static inline void net_ns_init(void) {}
+#endif
+
 #endif /* __NET_NET_NAMESPACE_H */
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 30cdfc4e1615..d145f1966682 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -92,6 +92,11 @@ struct netns_ipv6 {
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
 	struct sock		*mc_autojoin_sk;
+
+	struct hlist_head	*inet6_addr_lst;
+	spinlock_t		addrconf_hash_lock;
+	struct delayed_work	addr_chk_work;
+
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	struct mr_table		*mrt6;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 676cb8ea9e15..a3b57a93228a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -1028,4 +1028,15 @@ struct tc_fifo_qopt_offload {
 	};
 };
 
+#ifdef CONFIG_NET_CLS_ACT
+DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+void tc_skb_ext_tc_enable(void);
+void tc_skb_ext_tc_disable(void);
+#define tc_skb_ext_tc_enabled() static_branch_unlikely(&tc_skb_ext_tc)
+#else /* CONFIG_NET_CLS_ACT */
+static inline void tc_skb_ext_tc_enable(void) { }
+static inline void tc_skb_ext_tc_disable(void) { }
+#define tc_skb_ext_tc_enabled() false
+#endif
+
 #endif
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 443d45951564..4aa031849668 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -13,7 +13,7 @@
 
 void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
 bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max);
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
 void xsk_tx_release(struct xsk_buff_pool *pool);
 struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
 					    u16 queue_id);
@@ -142,8 +142,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
 	return false;
 }
 
-static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc,
-						 u32 max)
+static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max)
 {
 	return 0;
 }
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index ddeefc4a1040..5554ee75e7da 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -60,6 +60,7 @@ struct xsk_buff_pool {
 	 */
 	dma_addr_t *dma_pages;
 	struct xdp_buff_xsk *heads;
+	struct xdp_desc *tx_descs;
 	u64 chunk_mask;
 	u64 addrs_cnt;
 	u32 free_list_cnt;
diff --git a/include/trace/events/mctp.h b/include/trace/events/mctp.h
index 175b057c507f..165cf25f77a7 100644
--- a/include/trace/events/mctp.h
+++ b/include/trace/events/mctp.h
@@ -15,6 +15,7 @@ enum {
 	MCTP_TRACE_KEY_REPLIED,
 	MCTP_TRACE_KEY_INVALIDATED,
 	MCTP_TRACE_KEY_CLOSED,
+	MCTP_TRACE_KEY_DROPPED,
 };
 #endif /* __TRACE_MCTP_ENUMS */
 
@@ -22,6 +23,7 @@ TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_TIMEOUT);
 TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_REPLIED);
 TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_INVALIDATED);
 TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_CLOSED);
+TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_DROPPED);
 
 TRACE_EVENT(mctp_key_acquire,
 	TP_PROTO(const struct mctp_sk_key *key),
@@ -66,7 +68,8 @@ TRACE_EVENT(mctp_key_release,
 				 { MCTP_TRACE_KEY_TIMEOUT, "timeout" },
 				 { MCTP_TRACE_KEY_REPLIED, "replied" },
 				 { MCTP_TRACE_KEY_INVALIDATED, "invalidated" },
-				 { MCTP_TRACE_KEY_CLOSED, "closed" })
+				 { MCTP_TRACE_KEY_CLOSED, "closed" },
+				 { MCTP_TRACE_KEY_DROPPED, "dropped" })
 	)
 );
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index a8a64b97504d..cfcfd26399f7 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -16,6 +16,17 @@
 	EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM)			\
 	EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER)	\
 	EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM)			\
+	EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP)	\
+	EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST)		\
+	EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM)			\
+	EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR)			\
+	EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER)		\
+	EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,		\
+	   UNICAST_IN_L2_MULTICAST)				\
+	EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY)		\
+	EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO)		\
+	EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF)	\
+	EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM)		\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 16a7574292a5..afe3d0d7f5f2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5076,6 +5076,16 @@ union bpf_attr {
  *		associated to *xdp_md*, at *offset*.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure. On error
+ *		*dst* buffer is zeroed out.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5269,6 +5279,7 @@ union bpf_attr {
 	FN(xdp_get_buff_len),		\
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
+	FN(copy_from_user_task),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5563,7 +5574,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__be16 dst_port;	/* network byte order */
+	__u16 :16;		/* zero padding */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
@@ -6441,7 +6453,8 @@ struct bpf_sk_lookup {
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
 	__u32 remote_ip4;	/* Network byte order */
 	__u32 remote_ip6[4];	/* Network byte order */
-	__u32 remote_port;	/* Network byte order */
+	__be16 remote_port;	/* Network byte order */
+	__u16 :16;		/* Zero padding */
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h
index 829ffdfcacca..38f6a8fdfd34 100644
--- a/include/uapi/linux/ioam6_iptunnel.h
+++ b/include/uapi/linux/ioam6_iptunnel.h
@@ -41,6 +41,15 @@ enum {
 	/* IOAM Trace Header */
 	IOAM6_IPTUNNEL_TRACE,		/* struct ioam6_trace_hdr */
 
+	/* Insertion frequency:
+	 * "k over n" packets (0 < k <= n)
+	 * [0.0001% ... 100%]
+	 */
+#define IOAM6_IPTUNNEL_FREQ_MIN 1
+#define IOAM6_IPTUNNEL_FREQ_MAX 1000000
+	IOAM6_IPTUNNEL_FREQ_K,		/* u32 */
+	IOAM6_IPTUNNEL_FREQ_N,		/* u32 */
+
 	__IOAM6_IPTUNNEL_MAX,
 };
 
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 07b0318716fc..154ab56651f1 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -44,7 +44,25 @@ struct sockaddr_mctp_ext {
 
 #define MCTP_TAG_MASK		0x07
 #define MCTP_TAG_OWNER		0x08
+#define MCTP_TAG_PREALLOC	0x10
 
 #define MCTP_OPT_ADDR_EXT	1
 
+#define SIOCMCTPALLOCTAG	(SIOCPROTOPRIVATE + 0)
+#define SIOCMCTPDROPTAG		(SIOCPROTOPRIVATE + 1)
+
+struct mctp_ioc_tag_ctl {
+	mctp_eid_t	peer_addr;
+
+	/* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will
+	 * populate with the allocated tag value. Returned tag value will
+	 * always have TO and PREALLOC set.
+	 *
+	 * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from
+	 * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set).
+	 */
+	__u8		tag;
+	__u16		flags;
+};
+
 #endif /* __UAPI_MCTP_H */
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 66048cc5d7b3..1bbea8f0681e 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -93,6 +93,7 @@ enum net_dm_attr {
 	NET_DM_ATTR_SW_DROPS,			/* flag */
 	NET_DM_ATTR_HW_DROPS,			/* flag */
 	NET_DM_ATTR_FLOW_ACTION_COOKIE,		/* binary */
+	NET_DM_ATTR_REASON,			/* string */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/init/Kconfig b/init/Kconfig
index e9119bf54b1f..7328d4f25370 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -86,6 +86,10 @@ config CC_HAS_ASM_INLINE
 config CC_HAS_NO_PROFILE_FN_ATTR
 	def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror)
 
+config PAHOLE_VERSION
+	int
+	default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE))
+
 config CONSTRUCTORS
 	bool
 
diff --git a/init/main.c b/init/main.c
index 65fa2e41a9c0..ada50f5a15e4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -99,6 +99,7 @@
 #include <linux/kcsan.h>
 #include <linux/init_syscalls.h>
 #include <linux/stackdepot.h>
+#include <net/net_namespace.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1116,6 +1117,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
 	key_init();
 	security_init();
 	dbg_late_init();
+	net_ns_init();
 	vfs_caches_init();
 	pagecache_init();
 	signals_init();
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..110029ede71e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
 
 struct bpf_iter_target_info {
 	struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
 	int ret;
 
-	rcu_read_lock();
-	migrate_disable();
-	ret = bpf_prog_run(prog, ctx);
-	migrate_enable();
-	rcu_read_unlock();
+	if (prog->aux->sleepable) {
+		rcu_read_lock_trace();
+		migrate_disable();
+		might_fault();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock_trace();
+	} else {
+		rcu_read_lock();
+		migrate_disable();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock();
+	}
 
 	/* bpf program can only return 0 or 1:
 	 *  0 : okay
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index a1c44c17ea9c..11740b300de9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -419,6 +419,9 @@ static struct btf_type btf_void;
 static int btf_resolve(struct btf_verifier_env *env,
 		       const struct btf_type *t, u32 type_id);
 
+static int btf_func_check(struct btf_verifier_env *env,
+			  const struct btf_type *t);
+
 static bool btf_type_is_modifier(const struct btf_type *t)
 {
 	/* Some of them is not strictly a C modifier
@@ -595,6 +598,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t)
 	       btf_type_is_struct(t) ||
 	       btf_type_is_array(t) ||
 	       btf_type_is_var(t) ||
+	       btf_type_is_func(t) ||
 	       btf_type_is_decl_tag(t) ||
 	       btf_type_is_datasec(t);
 }
@@ -3571,9 +3575,24 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env,
 	return 0;
 }
 
+static int btf_func_resolve(struct btf_verifier_env *env,
+			    const struct resolve_vertex *v)
+{
+	const struct btf_type *t = v->t;
+	u32 next_type_id = t->type;
+	int err;
+
+	err = btf_func_check(env, t);
+	if (err)
+		return err;
+
+	env_stack_pop_resolved(env, next_type_id, 0);
+	return 0;
+}
+
 static struct btf_kind_operations func_ops = {
 	.check_meta = btf_func_check_meta,
-	.resolve = btf_df_resolve,
+	.resolve = btf_func_resolve,
 	.check_member = btf_df_check_member,
 	.check_kflag_member = btf_df_check_kflag_member,
 	.log_details = btf_ref_type_log,
@@ -4194,7 +4213,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
 		return !btf_resolved_type_id(btf, type_id) &&
 		       !btf_resolved_type_size(btf, type_id);
 
-	if (btf_type_is_decl_tag(t))
+	if (btf_type_is_decl_tag(t) || btf_type_is_func(t))
 		return btf_resolved_type_id(btf, type_id) &&
 		       !btf_resolved_type_size(btf, type_id);
 
@@ -4284,12 +4303,6 @@ static int btf_check_all_types(struct btf_verifier_env *env)
 			if (err)
 				return err;
 		}
-
-		if (btf_type_is_func(t)) {
-			err = btf_func_check(env, t);
-			if (err)
-				return err;
-		}
 	}
 
 	return 0;
@@ -4886,6 +4899,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	const char *tname = prog->aux->attach_func_name;
 	struct bpf_verifier_log *log = info->log;
 	const struct btf_param *args;
+	const char *tag_value;
 	u32 nr_args, arg;
 	int i, ret;
 
@@ -5038,6 +5052,13 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	info->btf = btf;
 	info->btf_id = t->type;
 	t = btf_type_by_id(btf, t->type);
+
+	if (btf_type_is_type_tag(t)) {
+		tag_value = __btf_name_by_offset(btf, t->name_off);
+		if (strcmp(tag_value, "user") == 0)
+			info->reg_type |= MEM_USER;
+	}
+
 	/* skip modifiers */
 	while (btf_type_is_modifier(t)) {
 		info->btf_id = t->type;
@@ -5064,12 +5085,12 @@ enum bpf_struct_walk_result {
 
 static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
 			   const struct btf_type *t, int off, int size,
-			   u32 *next_btf_id)
+			   u32 *next_btf_id, enum bpf_type_flag *flag)
 {
 	u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
 	const struct btf_type *mtype, *elem_type = NULL;
 	const struct btf_member *member;
-	const char *tname, *mname;
+	const char *tname, *mname, *tag_value;
 	u32 vlen, elem_id, mid;
 
 again:
@@ -5253,7 +5274,8 @@ error:
 		}
 
 		if (btf_type_is_ptr(mtype)) {
-			const struct btf_type *stype;
+			const struct btf_type *stype, *t;
+			enum bpf_type_flag tmp_flag = 0;
 			u32 id;
 
 			if (msize != size || off != moff) {
@@ -5262,9 +5284,19 @@ error:
 					mname, moff, tname, off, size);
 				return -EACCES;
 			}
+
+			/* check __user tag */
+			t = btf_type_by_id(btf, mtype->type);
+			if (btf_type_is_type_tag(t)) {
+				tag_value = __btf_name_by_offset(btf, t->name_off);
+				if (strcmp(tag_value, "user") == 0)
+					tmp_flag = MEM_USER;
+			}
+
 			stype = btf_type_skip_modifiers(btf, mtype->type, &id);
 			if (btf_type_is_struct(stype)) {
 				*next_btf_id = id;
+				*flag = tmp_flag;
 				return WALK_PTR;
 			}
 		}
@@ -5291,13 +5323,14 @@ error:
 int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
 		      const struct btf_type *t, int off, int size,
 		      enum bpf_access_type atype __maybe_unused,
-		      u32 *next_btf_id)
+		      u32 *next_btf_id, enum bpf_type_flag *flag)
 {
+	enum bpf_type_flag tmp_flag = 0;
 	int err;
 	u32 id;
 
 	do {
-		err = btf_struct_walk(log, btf, t, off, size, &id);
+		err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag);
 
 		switch (err) {
 		case WALK_PTR:
@@ -5305,6 +5338,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
 			 * we're done.
 			 */
 			*next_btf_id = id;
+			*flag = tmp_flag;
 			return PTR_TO_BTF_ID;
 		case WALK_SCALAR:
 			return SCALAR_VALUE;
@@ -5349,6 +5383,7 @@ bool btf_struct_ids_match(struct bpf_verifier_log *log,
 			  const struct btf *need_btf, u32 need_type_id)
 {
 	const struct btf_type *type;
+	enum bpf_type_flag flag;
 	int err;
 
 	/* Are we already done? */
@@ -5359,7 +5394,7 @@ again:
 	type = btf_type_by_id(btf, id);
 	if (!type)
 		return false;
-	err = btf_struct_walk(log, btf, type, off, 1, &id);
+	err = btf_struct_walk(log, btf, type, off, 1, &id, &flag);
 	if (err != WALK_STRUCT)
 		return false;
 
@@ -6740,8 +6775,19 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
 	int ret;
 
 	btf = btf_get_module_btf(kset->owner);
-	if (IS_ERR_OR_NULL(btf))
-		return btf ? PTR_ERR(btf) : -ENOENT;
+	if (!btf) {
+		if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+			pr_err("missing vmlinux BTF, cannot register kfuncs\n");
+			return -ENOENT;
+		}
+		if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
+			pr_err("missing module BTF, cannot register kfuncs\n");
+			return -ENOENT;
+		}
+		return 0;
+	}
+	if (IS_ERR(btf))
+		return PTR_ERR(btf);
 
 	hook = bpf_prog_type_to_kfunc_hook(prog_type);
 	ret = btf_populate_kfunc_set(btf, hook, kset);
@@ -6752,10 +6798,113 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
 }
 EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
 
+#define MAX_TYPES_ARE_COMPAT_DEPTH 2
+
+static
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				const struct btf *targ_btf, __u32 targ_id,
+				int level)
+{
+	const struct btf_type *local_type, *targ_type;
+	int depth = 32; /* max recursion depth */
+
+	/* caller made sure that names match (ignoring flavor suffix) */
+	local_type = btf_type_by_id(local_btf, local_id);
+	targ_type = btf_type_by_id(targ_btf, targ_id);
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
+	targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+		return 1;
+	case BTF_KIND_INT:
+		/* just reject deprecated bitfield-like integers; all other
+		 * integers are by default compatible between each other
+		 */
+		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+	case BTF_KIND_PTR:
+		local_id = local_type->type;
+		targ_id = targ_type->type;
+		goto recur;
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_type);
+		struct btf_param *targ_p = btf_params(targ_type);
+		__u16 local_vlen = btf_vlen(local_type);
+		__u16 targ_vlen = btf_vlen(targ_type);
+		int i, err;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			if (level <= 0)
+				return -EINVAL;
+
+			btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
+			btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
+			err = __bpf_core_types_are_compat(local_btf, local_id,
+							  targ_btf, targ_id,
+							  level - 1);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
+		btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
+		goto recur;
+	}
+	default:
+		return 0;
+	}
+}
+
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ *     kind should match for local and target types (i.e., STRUCT is not
+ *     compatible with UNION);
+ *   - for ENUMs, the size is ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ *   - FUNC_PROTOs are compatible if they have compatible signature: same
+ *     number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id)
 {
-	return -EOPNOTSUPP;
+	return __bpf_core_types_are_compat(local_btf, local_id,
+					   targ_btf, targ_id,
+					   MAX_TYPES_ARE_COMPAT_DEPTH);
 }
 
 static bool bpf_core_is_flavor_sep(const char *s)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 279ebbed75a5..098632fdbc45 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1384,20 +1384,6 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 }
 
 #ifdef CONFIG_NET
-static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
-					     enum cgroup_bpf_attach_type attach_type)
-{
-	struct bpf_prog_array *prog_array;
-	bool empty;
-
-	rcu_read_lock();
-	prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
-	empty = bpf_prog_array_is_empty(prog_array);
-	rcu_read_unlock();
-
-	return empty;
-}
-
 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
 			     struct bpf_sockopt_buf *buf)
 {
@@ -1456,19 +1442,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 	};
 	int ret, max_optlen;
 
-	/* Opportunistic check to see whether we have any BPF program
-	 * attached to the hook so we don't waste time allocating
-	 * memory and locking the socket.
-	 */
-	if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
-		return 0;
-
 	/* Allocate a bit more than the initial user buffer for
 	 * BPF program. The canonical use case is overriding
 	 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
 	 */
 	max_optlen = max_t(int, 16, *optlen);
-
 	max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
 	if (max_optlen < 0)
 		return max_optlen;
@@ -1550,15 +1528,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 	};
 	int ret;
 
-	/* Opportunistic check to see whether we have any BPF program
-	 * attached to the hook so we don't waste time allocating
-	 * memory and locking the socket.
-	 */
-	if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
-		return retval;
-
 	ctx.optlen = max_optlen;
-
 	max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
 	if (max_optlen < 0)
 		return max_optlen;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 0a1cfd8544b9..42d96549a804 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -537,13 +537,10 @@ long bpf_jit_limit_max __read_mostly;
 static void
 bpf_prog_ksym_set_addr(struct bpf_prog *prog)
 {
-	const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
-	unsigned long addr = (unsigned long)hdr;
-
 	WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
 
 	prog->aux->ksym.start = (unsigned long) prog->bpf_func;
-	prog->aux->ksym.end   = addr + hdr->pages * PAGE_SIZE;
+	prog->aux->ksym.end   = prog->aux->ksym.start + prog->jited_len;
 }
 
 static void
@@ -808,6 +805,137 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
 	return slot;
 }
 
+/*
+ * BPF program pack allocator.
+ *
+ * Most BPF programs are pretty small. Allocating a hole page for each
+ * program is sometime a waste. Many small bpf program also adds pressure
+ * to instruction TLB. To solve this issue, we introduce a BPF program pack
+ * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86)
+ * to host BPF programs.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define BPF_PROG_PACK_SIZE	HPAGE_PMD_SIZE
+#else
+#define BPF_PROG_PACK_SIZE	PAGE_SIZE
+#endif
+#define BPF_PROG_CHUNK_SHIFT	6
+#define BPF_PROG_CHUNK_SIZE	(1 << BPF_PROG_CHUNK_SHIFT)
+#define BPF_PROG_CHUNK_MASK	(~(BPF_PROG_CHUNK_SIZE - 1))
+#define BPF_PROG_CHUNK_COUNT	(BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
+
+struct bpf_prog_pack {
+	struct list_head list;
+	void *ptr;
+	unsigned long bitmap[BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)];
+};
+
+#define BPF_PROG_MAX_PACK_PROG_SIZE	BPF_PROG_PACK_SIZE
+#define BPF_PROG_SIZE_TO_NBITS(size)	(round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
+
+static DEFINE_MUTEX(pack_mutex);
+static LIST_HEAD(pack_list);
+
+static struct bpf_prog_pack *alloc_new_pack(void)
+{
+	struct bpf_prog_pack *pack;
+
+	pack = kzalloc(sizeof(*pack), GFP_KERNEL);
+	if (!pack)
+		return NULL;
+	pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
+	if (!pack->ptr) {
+		kfree(pack);
+		return NULL;
+	}
+	bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
+	list_add_tail(&pack->list, &pack_list);
+
+	set_vm_flush_reset_perms(pack->ptr);
+	set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+	set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+	return pack;
+}
+
+static void *bpf_prog_pack_alloc(u32 size)
+{
+	unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
+	struct bpf_prog_pack *pack;
+	unsigned long pos;
+	void *ptr = NULL;
+
+	if (size > BPF_PROG_MAX_PACK_PROG_SIZE) {
+		size = round_up(size, PAGE_SIZE);
+		ptr = module_alloc(size);
+		if (ptr) {
+			set_vm_flush_reset_perms(ptr);
+			set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
+			set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
+		}
+		return ptr;
+	}
+	mutex_lock(&pack_mutex);
+	list_for_each_entry(pack, &pack_list, list) {
+		pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+						 nbits, 0);
+		if (pos < BPF_PROG_CHUNK_COUNT)
+			goto found_free_area;
+	}
+
+	pack = alloc_new_pack();
+	if (!pack)
+		goto out;
+
+	pos = 0;
+
+found_free_area:
+	bitmap_set(pack->bitmap, pos, nbits);
+	ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
+
+out:
+	mutex_unlock(&pack_mutex);
+	return ptr;
+}
+
+static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
+{
+	struct bpf_prog_pack *pack = NULL, *tmp;
+	unsigned int nbits;
+	unsigned long pos;
+	void *pack_ptr;
+
+	if (hdr->size > BPF_PROG_MAX_PACK_PROG_SIZE) {
+		module_memfree(hdr);
+		return;
+	}
+
+	pack_ptr = (void *)((unsigned long)hdr & ~(BPF_PROG_PACK_SIZE - 1));
+	mutex_lock(&pack_mutex);
+
+	list_for_each_entry(tmp, &pack_list, list) {
+		if (tmp->ptr == pack_ptr) {
+			pack = tmp;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
+		goto out;
+
+	nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
+	pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT;
+
+	bitmap_clear(pack->bitmap, pos, nbits);
+	if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+				       BPF_PROG_CHUNK_COUNT, 0) == 0) {
+		list_del(&pack->list);
+		module_memfree(pack->ptr);
+		kfree(pack);
+	}
+out:
+	mutex_unlock(&pack_mutex);
+}
+
 static atomic_long_t bpf_jit_current;
 
 /* Can be overridden by an arch's JIT compiler if it has a custom,
@@ -833,12 +961,11 @@ static int __init bpf_jit_charge_init(void)
 }
 pure_initcall(bpf_jit_charge_init);
 
-int bpf_jit_charge_modmem(u32 pages)
+int bpf_jit_charge_modmem(u32 size)
 {
-	if (atomic_long_add_return(pages, &bpf_jit_current) >
-	    (bpf_jit_limit >> PAGE_SHIFT)) {
+	if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) {
 		if (!bpf_capable()) {
-			atomic_long_sub(pages, &bpf_jit_current);
+			atomic_long_sub(size, &bpf_jit_current);
 			return -EPERM;
 		}
 	}
@@ -846,9 +973,9 @@ int bpf_jit_charge_modmem(u32 pages)
 	return 0;
 }
 
-void bpf_jit_uncharge_modmem(u32 pages)
+void bpf_jit_uncharge_modmem(u32 size)
 {
-	atomic_long_sub(pages, &bpf_jit_current);
+	atomic_long_sub(size, &bpf_jit_current);
 }
 
 void *__weak bpf_jit_alloc_exec(unsigned long size)
@@ -867,7 +994,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 		     bpf_jit_fill_hole_t bpf_fill_ill_insns)
 {
 	struct bpf_binary_header *hdr;
-	u32 size, hole, start, pages;
+	u32 size, hole, start;
 
 	WARN_ON_ONCE(!is_power_of_2(alignment) ||
 		     alignment > BPF_IMAGE_ALIGNMENT);
@@ -877,20 +1004,19 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 	 * random section of illegal instructions.
 	 */
 	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
-	pages = size / PAGE_SIZE;
 
-	if (bpf_jit_charge_modmem(pages))
+	if (bpf_jit_charge_modmem(size))
 		return NULL;
 	hdr = bpf_jit_alloc_exec(size);
 	if (!hdr) {
-		bpf_jit_uncharge_modmem(pages);
+		bpf_jit_uncharge_modmem(size);
 		return NULL;
 	}
 
 	/* Fill space with illegal/arch-dep instructions. */
 	bpf_fill_ill_insns(hdr, size);
 
-	hdr->pages = pages;
+	hdr->size = size;
 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 		     PAGE_SIZE - sizeof(*hdr));
 	start = (get_random_int() % hole) & ~(alignment - 1);
@@ -903,10 +1029,113 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 
 void bpf_jit_binary_free(struct bpf_binary_header *hdr)
 {
-	u32 pages = hdr->pages;
+	u32 size = hdr->size;
 
 	bpf_jit_free_exec(hdr);
-	bpf_jit_uncharge_modmem(pages);
+	bpf_jit_uncharge_modmem(size);
+}
+
+/* Allocate jit binary from bpf_prog_pack allocator.
+ * Since the allocated memory is RO+X, the JIT engine cannot write directly
+ * to the memory. To solve this problem, a RW buffer is also allocated at
+ * as the same time. The JIT engine should calculate offsets based on the
+ * RO memory address, but write JITed program to the RW buffer. Once the
+ * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies
+ * the JITed program to the RO memory.
+ */
+struct bpf_binary_header *
+bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
+			  unsigned int alignment,
+			  struct bpf_binary_header **rw_header,
+			  u8 **rw_image,
+			  bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+	struct bpf_binary_header *ro_header;
+	u32 size, hole, start;
+
+	WARN_ON_ONCE(!is_power_of_2(alignment) ||
+		     alignment > BPF_IMAGE_ALIGNMENT);
+
+	/* add 16 bytes for a random section of illegal instructions */
+	size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE);
+
+	if (bpf_jit_charge_modmem(size))
+		return NULL;
+	ro_header = bpf_prog_pack_alloc(size);
+	if (!ro_header) {
+		bpf_jit_uncharge_modmem(size);
+		return NULL;
+	}
+
+	*rw_header = kvmalloc(size, GFP_KERNEL);
+	if (!*rw_header) {
+		bpf_prog_pack_free(ro_header);
+		bpf_jit_uncharge_modmem(size);
+		return NULL;
+	}
+
+	/* Fill space with illegal/arch-dep instructions. */
+	bpf_fill_ill_insns(*rw_header, size);
+	(*rw_header)->size = size;
+
+	hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)),
+		     BPF_PROG_CHUNK_SIZE - sizeof(*ro_header));
+	start = (get_random_int() % hole) & ~(alignment - 1);
+
+	*image_ptr = &ro_header->image[start];
+	*rw_image = &(*rw_header)->image[start];
+
+	return ro_header;
+}
+
+/* Copy JITed text from rw_header to its final location, the ro_header. */
+int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
+				 struct bpf_binary_header *ro_header,
+				 struct bpf_binary_header *rw_header)
+{
+	void *ptr;
+
+	ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size);
+
+	kvfree(rw_header);
+
+	if (IS_ERR(ptr)) {
+		bpf_prog_pack_free(ro_header);
+		return PTR_ERR(ptr);
+	}
+	prog->aux->use_bpf_prog_pack = true;
+	return 0;
+}
+
+/* bpf_jit_binary_pack_free is called in two different scenarios:
+ *   1) when the program is freed after;
+ *   2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
+ * For case 2), we need to free both the RO memory and the RW buffer.
+ * Also, ro_header->size in 2) is not properly set yet, so rw_header->size
+ * is used for uncharge.
+ */
+void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
+			      struct bpf_binary_header *rw_header)
+{
+	u32 size = rw_header ? rw_header->size : ro_header->size;
+
+	bpf_prog_pack_free(ro_header);
+	kvfree(rw_header);
+	bpf_jit_uncharge_modmem(size);
+}
+
+static inline struct bpf_binary_header *
+bpf_jit_binary_hdr(const struct bpf_prog *fp)
+{
+	unsigned long real_start = (unsigned long)fp->bpf_func;
+	unsigned long addr;
+
+	if (fp->aux->use_bpf_prog_pack)
+		addr = real_start & BPF_PROG_CHUNK_MASK;
+	else
+		addr = real_start & PAGE_MASK;
+
+	return (void *)addr;
 }
 
 /* This symbol is only overridden by archs that have different
@@ -918,7 +1147,10 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
 	if (fp->jited) {
 		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
 
-		bpf_jit_binary_free(hdr);
+		if (fp->aux->use_bpf_prog_pack)
+			bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */);
+		else
+			bpf_jit_binary_free(hdr);
 
 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
 	}
@@ -1968,18 +2200,10 @@ static struct bpf_prog_dummy {
 	},
 };
 
-/* to avoid allocating empty bpf_prog_array for cgroups that
- * don't have bpf program attached use one global 'empty_prog_array'
- * It will not be modified the caller of bpf_prog_array_alloc()
- * (since caller requested prog_cnt == 0)
- * that pointer should be 'freed' by bpf_prog_array_free()
- */
-static struct {
-	struct bpf_prog_array hdr;
-	struct bpf_prog *null_prog;
-} empty_prog_array = {
+struct bpf_empty_prog_array bpf_empty_prog_array = {
 	.null_prog = NULL,
 };
+EXPORT_SYMBOL(bpf_empty_prog_array);
 
 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 {
@@ -1989,12 +2213,12 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 			       (prog_cnt + 1),
 			       flags);
 
-	return &empty_prog_array.hdr;
+	return &bpf_empty_prog_array.hdr;
 }
 
 void bpf_prog_array_free(struct bpf_prog_array *progs)
 {
-	if (!progs || progs == &empty_prog_array.hdr)
+	if (!progs || progs == &bpf_empty_prog_array.hdr)
 		return;
 	kfree_rcu(progs, rcu);
 }
@@ -2453,6 +2677,11 @@ int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 	return -ENOTSUPP;
 }
 
+void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
 DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 EXPORT_SYMBOL(bpf_stats_enabled_key);
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..4e5969fde0b3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,39 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
+	   const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
+{
+	int ret;
+
+	/* flags is not used yet */
+	if (unlikely(flags))
+		return -EINVAL;
+
+	if (unlikely(!size))
+		return 0;
+
+	ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+	if (ret == size)
+		return 0;
+
+	memset(dst, 0, size);
+	/* Return -EFAULT for partial read */
+	return ret < 0 ? ret : -EFAULT;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_task_proto = {
+	.func		= bpf_copy_from_user_task,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_BTF_ID,
+	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg5_type	= ARG_ANYTHING
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
index 1400ac58178e..baf47d9c7557 100644
--- a/kernel/bpf/preload/Makefile
+++ b/kernel/bpf/preload/Makefile
@@ -1,40 +1,16 @@
 # SPDX-License-Identifier: GPL-2.0
 
 LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
-LIBBPF_OUT = $(abspath $(obj))/libbpf
-LIBBPF_A = $(LIBBPF_OUT)/libbpf.a
-LIBBPF_DESTDIR = $(LIBBPF_OUT)
-LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
-
-# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test
-# in tools/scripts/Makefile.include always succeeds when building the kernel
-# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg".
-$(LIBBPF_A): | $(LIBBPF_OUT)
-	$(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/   \
-		DESTDIR=$(LIBBPF_DESTDIR) prefix=			       \
-		$(LIBBPF_OUT)/libbpf.a install_headers
-
-libbpf_hdrs: $(LIBBPF_A)
-
-.PHONY: libbpf_hdrs
-
-$(LIBBPF_OUT):
-	$(call msg,MKDIR,$@)
-	$(Q)mkdir -p $@
+LIBBPF_INCLUDE = $(LIBBPF_SRCS)/..
 
 userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
 	-I $(LIBBPF_INCLUDE) -Wno-unused-result
 
 userprogs := bpf_preload_umd
 
-clean-files := libbpf/
-
-$(obj)/iterators/iterators.o: | libbpf_hdrs
-
 bpf_preload_umd-objs := iterators/iterators.o
-bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
 
-$(obj)/bpf_preload_umd: $(LIBBPF_A)
+$(obj)/bpf_preload_umd:
 
 $(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd
 
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index b8bd60511227..bfe24f8c5a20 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -35,15 +35,15 @@ endif
 
 .PHONY: all clean
 
-all: iterators.skel.h
+all: iterators.lskel.h
 
 clean:
 	$(call msg,CLEAN)
 	$(Q)rm -rf $(OUTPUT) iterators
 
-iterators.skel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
+iterators.lskel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
 	$(call msg,GEN-SKEL,$@)
-	$(Q)$(BPFTOOL) gen skeleton $< > $@
+	$(Q)$(BPFTOOL) gen skeleton -L $< > $@
 
 
 $(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c
index 5d872a705470..4dafe0f4f2b2 100644
--- a/kernel/bpf/preload/iterators/iterators.c
+++ b/kernel/bpf/preload/iterators/iterators.c
@@ -10,20 +10,36 @@
 #include <bpf/libbpf.h>
 #include <bpf/bpf.h>
 #include <sys/mount.h>
-#include "iterators.skel.h"
+#include "iterators.lskel.h"
 #include "bpf_preload_common.h"
 
 int to_kernel = -1;
 int from_kernel = 0;
 
-static int send_link_to_kernel(struct bpf_link *link, const char *link_name)
+static int __bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
+{
+	union bpf_attr attr;
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.info.bpf_fd = bpf_fd;
+	attr.info.info_len = *info_len;
+	attr.info.info = (long) info;
+
+	err = skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+	if (!err)
+		*info_len = attr.info.info_len;
+	return err;
+}
+
+static int send_link_to_kernel(int link_fd, const char *link_name)
 {
 	struct bpf_preload_info obj = {};
 	struct bpf_link_info info = {};
 	__u32 info_len = sizeof(info);
 	int err;
 
-	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+	err = __bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
 	if (err)
 		return err;
 	obj.link_id = info.id;
@@ -37,7 +53,6 @@ static int send_link_to_kernel(struct bpf_link *link, const char *link_name)
 
 int main(int argc, char **argv)
 {
-	struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY };
 	struct iterators_bpf *skel;
 	int err, magic;
 	int debug_fd;
@@ -55,7 +70,6 @@ int main(int argc, char **argv)
 		printf("bad start magic %d\n", magic);
 		return 1;
 	}
-	setrlimit(RLIMIT_MEMLOCK, &rlim);
 	/* libbpf opens BPF object and loads it into the kernel */
 	skel = iterators_bpf__open_and_load();
 	if (!skel) {
@@ -72,10 +86,10 @@ int main(int argc, char **argv)
 		goto cleanup;
 
 	/* send two bpf_link IDs with names to the kernel */
-	err = send_link_to_kernel(skel->links.dump_bpf_map, "maps.debug");
+	err = send_link_to_kernel(skel->links.dump_bpf_map_fd, "maps.debug");
 	if (err)
 		goto cleanup;
-	err = send_link_to_kernel(skel->links.dump_bpf_prog, "progs.debug");
+	err = send_link_to_kernel(skel->links.dump_bpf_prog_fd, "progs.debug");
 	if (err)
 		goto cleanup;
 
diff --git a/kernel/bpf/preload/iterators/iterators.lskel.h b/kernel/bpf/preload/iterators/iterators.lskel.h
new file mode 100644
index 000000000000..d90562d672d2
--- /dev/null
+++ b/kernel/bpf/preload/iterators/iterators.lskel.h
@@ -0,0 +1,428 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* THIS FILE IS AUTOGENERATED! */
+#ifndef __ITERATORS_BPF_SKEL_H__
+#define __ITERATORS_BPF_SKEL_H__
+
+#include <stdlib.h>
+#include <bpf/bpf.h>
+#include <bpf/skel_internal.h>
+
+struct iterators_bpf {
+	struct bpf_loader_ctx ctx;
+	struct {
+		struct bpf_map_desc rodata;
+	} maps;
+	struct {
+		struct bpf_prog_desc dump_bpf_map;
+		struct bpf_prog_desc dump_bpf_prog;
+	} progs;
+	struct {
+		int dump_bpf_map_fd;
+		int dump_bpf_prog_fd;
+	} links;
+	struct iterators_bpf__rodata {
+	} *rodata;
+};
+
+static inline int
+iterators_bpf__dump_bpf_map__attach(struct iterators_bpf *skel)
+{
+	int prog_fd = skel->progs.dump_bpf_map.prog_fd;
+	int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+	if (fd > 0)
+		skel->links.dump_bpf_map_fd = fd;
+	return fd;
+}
+
+static inline int
+iterators_bpf__dump_bpf_prog__attach(struct iterators_bpf *skel)
+{
+	int prog_fd = skel->progs.dump_bpf_prog.prog_fd;
+	int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+	if (fd > 0)
+		skel->links.dump_bpf_prog_fd = fd;
+	return fd;
+}
+
+static inline int
+iterators_bpf__attach(struct iterators_bpf *skel)
+{
+	int ret = 0;
+
+	ret = ret < 0 ? ret : iterators_bpf__dump_bpf_map__attach(skel);
+	ret = ret < 0 ? ret : iterators_bpf__dump_bpf_prog__attach(skel);
+	return ret < 0 ? ret : 0;
+}
+
+static inline void
+iterators_bpf__detach(struct iterators_bpf *skel)
+{
+	skel_closenz(skel->links.dump_bpf_map_fd);
+	skel_closenz(skel->links.dump_bpf_prog_fd);
+}
+static void
+iterators_bpf__destroy(struct iterators_bpf *skel)
+{
+	if (!skel)
+		return;
+	iterators_bpf__detach(skel);
+	skel_closenz(skel->progs.dump_bpf_map.prog_fd);
+	skel_closenz(skel->progs.dump_bpf_prog.prog_fd);
+	munmap(skel->rodata, 4096);
+	skel_closenz(skel->maps.rodata.map_fd);
+	free(skel);
+}
+static inline struct iterators_bpf *
+iterators_bpf__open(void)
+{
+	struct iterators_bpf *skel;
+
+	skel = calloc(sizeof(*skel), 1);
+	if (!skel)
+		goto cleanup;
+	skel->ctx.sz = (void *)&skel->links - (void *)skel;
+	skel->rodata =
+		mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+		     MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (skel->rodata == (void *) -1)
+		goto cleanup;
+	memcpy(skel->rodata, (void *)"\
+\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\
+\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\
+\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0", 98);
+	skel->maps.rodata.initial_value = (__u64)(long)skel->rodata;
+	return skel;
+cleanup:
+	iterators_bpf__destroy(skel);
+	return NULL;
+}
+
+static inline int
+iterators_bpf__load(struct iterators_bpf *skel)
+{
+	struct bpf_load_and_run_opts opts = {};
+	int err;
+
+	opts.ctx = (struct bpf_loader_ctx *)skel;
+	opts.data_sz = 6056;
+	opts.data = (void *)"\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9f\xeb\x01\0\
+\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\xf9\x04\0\0\0\0\0\0\0\0\0\x02\x02\0\
+\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\0\0\0\x04\
+\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\0\0\0\0\0\
+\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\0\0\0\x20\
+\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xa3\0\0\0\x03\0\0\x04\x18\0\0\0\xb1\0\
+\0\0\x09\0\0\0\0\0\0\0\xb5\0\0\0\x0b\0\0\0\x40\0\0\0\xc0\0\0\0\x0b\0\0\0\x80\0\
+\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xc8\0\0\0\0\0\0\x07\0\0\0\0\xd1\0\0\0\0\0\0\
+\x08\x0c\0\0\0\xd7\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\x94\x01\0\0\x03\0\0\x04\
+\x18\0\0\0\x9c\x01\0\0\x0e\0\0\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xa4\
+\x01\0\0\x0e\0\0\0\xa0\0\0\0\xb0\x01\0\0\0\0\0\x08\x0f\0\0\0\xb6\x01\0\0\0\0\0\
+\x01\x04\0\0\0\x20\0\0\0\xc3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\0\0\0\
+\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xc8\x01\0\0\0\0\0\x01\x04\0\0\0\
+\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x2c\x02\0\0\x02\0\0\x04\x10\0\0\0\x13\0\
+\0\0\x03\0\0\0\0\0\0\0\x3f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x18\0\
+\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x44\x02\0\0\x01\0\0\x0c\
+\x16\0\0\0\x90\x02\0\0\x01\0\0\x04\x08\0\0\0\x99\x02\0\0\x19\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x02\x1a\0\0\0\xea\x02\0\0\x06\0\0\x04\x38\0\0\0\x9c\x01\0\0\x0e\0\0\
+\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xf7\x02\0\0\x1b\0\0\0\xc0\0\0\0\x08\
+\x03\0\0\x15\0\0\0\0\x01\0\0\x11\x03\0\0\x1d\0\0\0\x40\x01\0\0\x1b\x03\0\0\x1e\
+\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\0\0\0\0\
+\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x65\x03\0\0\x02\0\0\x04\
+\x08\0\0\0\x73\x03\0\0\x0e\0\0\0\0\0\0\0\x7c\x03\0\0\x0e\0\0\0\x20\0\0\0\x1b\
+\x03\0\0\x03\0\0\x04\x18\0\0\0\x86\x03\0\0\x1b\0\0\0\0\0\0\0\x8e\x03\0\0\x21\0\
+\0\0\x40\0\0\0\x94\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\0\0\0\
+\0\0\0\0\0\x02\x24\0\0\0\x98\x03\0\0\x01\0\0\x04\x04\0\0\0\xa3\x03\0\0\x0e\0\0\
+\0\0\0\0\0\x0c\x04\0\0\x01\0\0\x04\x04\0\0\0\x15\x04\0\0\x0e\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x8b\x04\0\0\0\0\0\x0e\x25\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\x9f\x04\
+\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\
+\x20\0\0\0\xb5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\
+\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xca\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xe1\x04\0\0\0\0\0\x0e\x2d\0\0\
+\0\x01\0\0\0\xe9\x04\0\0\x04\0\0\x0f\x62\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\0\x28\
+\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\0\0\
+\x11\0\0\0\xf1\x04\0\0\x01\0\0\x0f\x04\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\x62\
+\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\x74\
+\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
+\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x30\
+\x3a\x30\0\x2f\x77\x2f\x6e\x65\x74\x2d\x6e\x65\x78\x74\x2f\x6b\x65\x72\x6e\x65\
+\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\
+\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\
+\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\
+\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\
+\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\
+\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\
+\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x75\x6e\x73\x69\x67\x6e\
+\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\0\x30\x3a\x31\0\x09\x73\x74\
+\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\
+\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\
+\x29\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x20\x63\
+\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\x3b\0\x30\
+\x3a\x32\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\
+\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\
+\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\
+\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\
+\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
+\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
+\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
+\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
+\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
+\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
+\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
+\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
+\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
+\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
+\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
+\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
+\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
+\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
+\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
+\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
+\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
+\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
+\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
+\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
+\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
+\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
+\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
+\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
+\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
+\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
+\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
+\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
+\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
+\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
+\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
+\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
+\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
+\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
+\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
+\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
+\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
+\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
+\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
+\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
+\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2d\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x02\0\0\
+\0\x04\0\0\0\x62\0\0\0\x01\0\0\0\x80\x04\0\0\0\0\0\0\0\0\0\0\x69\x74\x65\x72\
+\x61\x74\x6f\x72\x2e\x72\x6f\x64\x61\x74\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\x2f\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\
+\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\
+\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\
+\x25\x73\x20\x25\x73\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\
+\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\x1b\0\0\
+\0\0\0\x79\x11\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\
+\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\
+\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\
+\0\0\0\0\0\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\
+\x7b\x1a\xf8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\
+\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x23\0\0\0\xb7\x03\0\0\x0e\0\0\0\
+\xb7\x05\0\0\x18\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
+\0\0\0\0\x07\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x3c\x01\0\x01\0\0\0\x42\0\0\
+\0\x7b\0\0\0\x24\x3c\x01\0\x02\0\0\0\x42\0\0\0\xee\0\0\0\x1d\x44\x01\0\x03\0\0\
+\0\x42\0\0\0\x0f\x01\0\0\x06\x4c\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\x40\
+\x01\0\x05\0\0\0\x42\0\0\0\x1a\x01\0\0\x1d\x40\x01\0\x06\0\0\0\x42\0\0\0\x43\
+\x01\0\0\x06\x58\x01\0\x08\0\0\0\x42\0\0\0\x56\x01\0\0\x03\x5c\x01\0\x0f\0\0\0\
+\x42\0\0\0\xdc\x01\0\0\x02\x64\x01\0\x1f\0\0\0\x42\0\0\0\x2a\x02\0\0\x01\x6c\
+\x01\0\0\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\
+\0\x10\0\0\0\x02\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\
+\x28\0\0\0\x08\0\0\0\x3f\x01\0\0\0\0\0\0\x78\0\0\0\x0d\0\0\0\x3e\0\0\0\0\0\0\0\
+\x88\0\0\0\x0d\0\0\0\xea\0\0\0\0\0\0\0\xa8\0\0\0\x0d\0\0\0\x3f\x01\0\0\0\0\0\0\
+\x1a\0\0\0\x21\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\0\0\0\
+\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\
+\0\0\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x10\0\0\0\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\
+\0\0\0\0\x79\x12\x08\0\0\0\0\0\x15\x02\x3c\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x79\
+\x27\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\
+\0\x07\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\
+\x31\0\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\
+\x6a\xc8\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\
+\x04\0\0\0\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\
+\x78\x30\0\0\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\
+\0\x61\x11\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\
+\0\0\0\0\0\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\
+\xb7\x02\0\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\
+\0\0\0\0\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\
+\xb7\x02\0\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\
+\xff\0\0\0\0\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\
+\xbf\x69\0\0\0\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\
+\xff\0\0\0\0\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\
+\x1a\xe8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\
+\xc8\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x51\0\0\0\xb7\x03\0\0\x11\0\0\0\
+\xb7\x05\0\0\x20\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
+\0\0\0\0\x17\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x80\x01\0\x01\0\0\0\x42\0\0\
+\0\x7b\0\0\0\x24\x80\x01\0\x02\0\0\0\x42\0\0\0\x60\x02\0\0\x1f\x88\x01\0\x03\0\
+\0\0\x42\0\0\0\x84\x02\0\0\x06\x94\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\
+\x84\x01\0\x05\0\0\0\x42\0\0\0\x9d\x02\0\0\x0e\xa0\x01\0\x06\0\0\0\x42\0\0\0\
+\x1a\x01\0\0\x1d\x84\x01\0\x07\0\0\0\x42\0\0\0\x43\x01\0\0\x06\xa4\x01\0\x09\0\
+\0\0\x42\0\0\0\xaf\x02\0\0\x03\xa8\x01\0\x11\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\
+\xb0\x01\0\x18\0\0\0\x42\0\0\0\x5a\x03\0\0\x06\x04\x01\0\x1b\0\0\0\x42\0\0\0\0\
+\0\0\0\0\0\0\0\x1c\0\0\0\x42\0\0\0\xab\x03\0\0\x0f\x10\x01\0\x1d\0\0\0\x42\0\0\
+\0\xc0\x03\0\0\x2d\x14\x01\0\x1f\0\0\0\x42\0\0\0\xf7\x03\0\0\x0d\x0c\x01\0\x21\
+\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x22\0\0\0\x42\0\0\0\xc0\x03\0\0\x02\x14\x01\0\
+\x25\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x28\0\0\0\x42\0\0\0\0\0\0\0\0\0\
+\0\0\x29\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x2c\0\0\0\x42\0\0\0\x1e\x04\
+\0\0\x0d\x18\x01\0\x2d\0\0\0\x42\0\0\0\x4c\x04\0\0\x1b\x1c\x01\0\x2e\0\0\0\x42\
+\0\0\0\x4c\x04\0\0\x06\x1c\x01\0\x2f\0\0\0\x42\0\0\0\x6f\x04\0\0\x0d\x24\x01\0\
+\x31\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\xb0\x01\0\x40\0\0\0\x42\0\0\0\x2a\x02\0\0\
+\x01\xc0\x01\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\
+\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x14\0\0\0\x3e\0\0\0\
+\0\0\0\0\x28\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x30\0\0\0\x08\0\0\0\x3f\x01\0\0\
+\0\0\0\0\x88\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x98\0\0\0\x1a\0\0\0\xea\0\0\0\0\
+\0\0\0\xb0\0\0\0\x1a\0\0\0\x52\x03\0\0\0\0\0\0\xb8\0\0\0\x1a\0\0\0\x56\x03\0\0\
+\0\0\0\0\xc8\0\0\0\x1f\0\0\0\x84\x03\0\0\0\0\0\0\xe0\0\0\0\x20\0\0\0\xea\0\0\0\
+\0\0\0\0\xf8\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x20\x01\0\0\x24\0\0\0\x3e\0\0\0\
+\0\0\0\0\x58\x01\0\0\x1a\0\0\0\xea\0\0\0\0\0\0\0\x68\x01\0\0\x20\0\0\0\x46\x04\
+\0\0\0\0\0\0\x90\x01\0\0\x1a\0\0\0\x3f\x01\0\0\0\0\0\0\xa0\x01\0\0\x1a\0\0\0\
+\x87\x04\0\0\0\0\0\0\xa8\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x1a\0\0\0\x42\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0\x1c\0\0\
+\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x1a\0\
+\0\0\x01\0\0\0\0\0\0\0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\
+\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\
+\0\0\0\0";
+	opts.insns_sz = 2184;
+	opts.insns = (void *)"\
+\xbf\x16\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x78\xff\xff\xff\xb7\x02\0\
+\0\x88\0\0\0\xb7\x03\0\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x05\0\x14\0\0\0\0\0\x61\
+\xa1\x78\xff\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x7c\xff\
+\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x80\xff\0\0\0\0\xd5\
+\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x84\xff\0\0\0\0\xd5\x01\x01\0\0\
+\0\0\0\x85\0\0\0\xa8\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x01\0\0\0\0\
+\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xbf\x70\0\0\
+\0\0\0\0\x95\0\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x48\x0e\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\
+\0\0\x44\x0e\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\
+\0\0\0\0\x38\x0e\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x05\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x12\0\
+\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\xb7\x03\0\0\x1c\0\0\0\x85\0\0\0\
+\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xd4\xff\0\0\0\0\x63\x7a\x78\xff\0\0\0\0\
+\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x0e\0\0\x63\x01\0\0\0\
+\0\0\0\x61\x60\x20\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x5c\x0e\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\
+\0\x50\x0e\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\
+\xc5\x07\xc3\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x71\0\0\0\0\0\
+\0\x79\x63\x18\0\0\0\0\0\x15\x03\x04\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\
+\x0e\0\0\xb7\x02\0\0\x62\0\0\0\x85\0\0\0\x94\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\x63\x01\0\
+\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x10\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x0e\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x18\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\0\0\
+\x18\x62\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\xa6\0\
+\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xa3\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\x63\x01\0\0\
+\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\xb7\x03\
+\0\0\x04\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x96\xff\0\0\0\0\
+\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x11\0\
+\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\x18\x61\0\0\0\0\
+\0\0\0\0\0\0\x70\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x40\
+\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\
+\0\0\0\0\0\0\0\0\0\x48\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x11\0\0\x7b\x01\
+\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\
+\0\xe8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\x61\
+\0\0\0\0\0\0\0\0\0\0\xe0\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x80\x11\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\x84\x11\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\
+\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\xa0\x78\
+\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x11\0\0\x63\x01\0\0\0\0\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\xf8\x11\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\x0c\0\0\
+\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x60\xff\
+\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x68\x11\0\0\x63\x70\x6c\0\0\0\0\0\x77\x07\
+\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\0\0\0\0\
+\0\0\0\0\x68\x11\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\
+\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd8\x11\0\0\x61\x01\0\0\0\0\0\0\xd5\x01\x02\0\
+\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x4e\xff\0\0\0\0\x63\
+\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x18\x61\0\0\0\0\0\
+\0\0\0\0\0\x10\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x18\x12\
+\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\
+\0\0\0\0\0\0\0\x28\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x50\x17\0\0\x7b\x01\0\0\
+\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x60\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x15\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x80\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x17\0\0\x7b\x01\0\0\0\0\0\0\x61\
+\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x17\0\0\x63\x01\0\0\0\0\0\0\
+\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x1c\x17\0\0\x63\x01\0\0\0\0\
+\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\x7b\x01\0\0\
+\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x48\x17\0\0\x63\
+\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x17\0\0\xb7\x02\0\0\x12\0\0\0\
+\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\
+\0\0\xc5\x07\x17\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\x63\x70\x6c\
+\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\
+\x18\x62\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\
+\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x17\0\0\x61\x01\0\0\0\0\
+\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x05\
+\xff\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\x02\0\0\0\
+\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\x63\x06\
+\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\x18\x61\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\xb7\0\0\0\0\0\0\0\
+\x95\0\0\0\0\0\0\0";
+	err = bpf_load_and_run(&opts);
+	if (err < 0)
+		return err;
+	skel->rodata =
+		mmap(skel->rodata, 4096, PROT_READ, MAP_SHARED | MAP_FIXED,
+			skel->maps.rodata.map_fd, 0);
+	return 0;
+}
+
+static inline struct iterators_bpf *
+iterators_bpf__open_and_load(void)
+{
+	struct iterators_bpf *skel;
+
+	skel = iterators_bpf__open();
+	if (!skel)
+		return NULL;
+	if (iterators_bpf__load(skel)) {
+		iterators_bpf__destroy(skel);
+		return NULL;
+	}
+	return skel;
+}
+
+#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/preload/iterators/iterators.skel.h b/kernel/bpf/preload/iterators/iterators.skel.h
deleted file mode 100644
index cf9a6a94b3a4..000000000000
--- a/kernel/bpf/preload/iterators/iterators.skel.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-
-/* THIS FILE IS AUTOGENERATED! */
-#ifndef __ITERATORS_BPF_SKEL_H__
-#define __ITERATORS_BPF_SKEL_H__
-
-#include <stdlib.h>
-#include <bpf/libbpf.h>
-
-struct iterators_bpf {
-	struct bpf_object_skeleton *skeleton;
-	struct bpf_object *obj;
-	struct {
-		struct bpf_map *rodata;
-	} maps;
-	struct {
-		struct bpf_program *dump_bpf_map;
-		struct bpf_program *dump_bpf_prog;
-	} progs;
-	struct {
-		struct bpf_link *dump_bpf_map;
-		struct bpf_link *dump_bpf_prog;
-	} links;
-	struct iterators_bpf__rodata {
-		char dump_bpf_map____fmt[35];
-		char dump_bpf_map____fmt_1[14];
-		char dump_bpf_prog____fmt[32];
-		char dump_bpf_prog____fmt_2[17];
-	} *rodata;
-};
-
-static void
-iterators_bpf__destroy(struct iterators_bpf *obj)
-{
-	if (!obj)
-		return;
-	if (obj->skeleton)
-		bpf_object__destroy_skeleton(obj->skeleton);
-	free(obj);
-}
-
-static inline int
-iterators_bpf__create_skeleton(struct iterators_bpf *obj);
-
-static inline struct iterators_bpf *
-iterators_bpf__open_opts(const struct bpf_object_open_opts *opts)
-{
-	struct iterators_bpf *obj;
-
-	obj = (struct iterators_bpf *)calloc(1, sizeof(*obj));
-	if (!obj)
-		return NULL;
-	if (iterators_bpf__create_skeleton(obj))
-		goto err;
-	if (bpf_object__open_skeleton(obj->skeleton, opts))
-		goto err;
-
-	return obj;
-err:
-	iterators_bpf__destroy(obj);
-	return NULL;
-}
-
-static inline struct iterators_bpf *
-iterators_bpf__open(void)
-{
-	return iterators_bpf__open_opts(NULL);
-}
-
-static inline int
-iterators_bpf__load(struct iterators_bpf *obj)
-{
-	return bpf_object__load_skeleton(obj->skeleton);
-}
-
-static inline struct iterators_bpf *
-iterators_bpf__open_and_load(void)
-{
-	struct iterators_bpf *obj;
-
-	obj = iterators_bpf__open();
-	if (!obj)
-		return NULL;
-	if (iterators_bpf__load(obj)) {
-		iterators_bpf__destroy(obj);
-		return NULL;
-	}
-	return obj;
-}
-
-static inline int
-iterators_bpf__attach(struct iterators_bpf *obj)
-{
-	return bpf_object__attach_skeleton(obj->skeleton);
-}
-
-static inline void
-iterators_bpf__detach(struct iterators_bpf *obj)
-{
-	return bpf_object__detach_skeleton(obj->skeleton);
-}
-
-static inline int
-iterators_bpf__create_skeleton(struct iterators_bpf *obj)
-{
-	struct bpf_object_skeleton *s;
-
-	s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
-	if (!s)
-		return -1;
-	obj->skeleton = s;
-
-	s->sz = sizeof(*s);
-	s->name = "iterators_bpf";
-	s->obj = &obj->obj;
-
-	/* maps */
-	s->map_cnt = 1;
-	s->map_skel_sz = sizeof(*s->maps);
-	s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
-	if (!s->maps)
-		goto err;
-
-	s->maps[0].name = "iterator.rodata";
-	s->maps[0].map = &obj->maps.rodata;
-	s->maps[0].mmaped = (void **)&obj->rodata;
-
-	/* programs */
-	s->prog_cnt = 2;
-	s->prog_skel_sz = sizeof(*s->progs);
-	s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
-	if (!s->progs)
-		goto err;
-
-	s->progs[0].name = "dump_bpf_map";
-	s->progs[0].prog = &obj->progs.dump_bpf_map;
-	s->progs[0].link = &obj->links.dump_bpf_map;
-
-	s->progs[1].name = "dump_bpf_prog";
-	s->progs[1].prog = &obj->progs.dump_bpf_prog;
-	s->progs[1].link = &obj->links.dump_bpf_prog;
-
-	s->data_sz = 7176;
-	s->data = (void *)"\
-\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\x48\x18\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0f\0\
-\x0e\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\
-\x1a\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\
-\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x61\x71\0\
-\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\0\0\0\0\0\
-\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\x7b\x1a\xf8\
-\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\
-\0\x18\x02\0\0\x23\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x0e\0\0\0\xb7\x05\0\0\x18\
-\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x79\x12\0\0\0\0\
-\0\0\x79\x26\0\0\0\0\0\0\x79\x11\x08\0\0\0\0\0\x15\x01\x3b\0\0\0\0\0\x79\x17\0\
-\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\
-\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\x31\0\0\0\0\0\0\0\0\0\
-\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\x6a\xc8\
-\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\x04\0\0\0\
-\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\x78\x30\0\0\
-\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\0\x61\x11\
-\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\0\0\0\0\0\
-\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\xb7\x02\0\
-\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\0\0\0\0\
-\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\xb7\x02\0\
-\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\
-\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\xbf\x69\0\0\0\
-\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\
-\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\x1a\xe8\xff\
-\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\xc8\xff\0\0\0\
-\0\x18\x02\0\0\x51\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x11\0\0\0\xb7\x05\0\0\x20\
-\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x20\x20\x69\x64\
-\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\
-\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\
-\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\
-\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0\x47\x50\x4c\0\x9f\
-\xeb\x01\0\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\x09\x05\0\0\0\0\0\0\0\0\0\
-\x02\x02\0\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\
-\0\0\0\x04\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\
-\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xaf\0\0\0\x03\0\0\x04\x18\0\
-\0\0\xbd\0\0\0\x09\0\0\0\0\0\0\0\xc1\0\0\0\x0b\0\0\0\x40\0\0\0\xcc\0\0\0\x0b\0\
-\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xd4\0\0\0\0\0\0\x07\0\0\0\0\xdd\0\0\
-\0\0\0\0\x08\x0c\0\0\0\xe3\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\xa4\x01\0\0\x03\
-\0\0\x04\x18\0\0\0\xac\x01\0\0\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\
-\0\xb4\x01\0\0\x0e\0\0\0\xa0\0\0\0\xc0\x01\0\0\0\0\0\x08\x0f\0\0\0\xc6\x01\0\0\
-\0\0\0\x01\x04\0\0\0\x20\0\0\0\xd3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\
-\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xd8\x01\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x3c\x02\0\0\x02\0\0\x04\x10\0\0\0\
-\x13\0\0\0\x03\0\0\0\0\0\0\0\x4f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\
-\x18\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x54\x02\0\0\x01\0\
-\0\x0c\x16\0\0\0\xa0\x02\0\0\x01\0\0\x04\x08\0\0\0\xa9\x02\0\0\x19\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x02\x1a\0\0\0\xfa\x02\0\0\x06\0\0\x04\x38\0\0\0\xac\x01\0\0\
-\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\0\x07\x03\0\0\x1b\0\0\0\xc0\0\
-\0\0\x18\x03\0\0\x15\0\0\0\0\x01\0\0\x21\x03\0\0\x1d\0\0\0\x40\x01\0\0\x2b\x03\
-\0\0\x1e\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\
-\0\0\0\0\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x75\x03\0\0\x02\0\
-\0\x04\x08\0\0\0\x83\x03\0\0\x0e\0\0\0\0\0\0\0\x8c\x03\0\0\x0e\0\0\0\x20\0\0\0\
-\x2b\x03\0\0\x03\0\0\x04\x18\0\0\0\x96\x03\0\0\x1b\0\0\0\0\0\0\0\x9e\x03\0\0\
-\x21\0\0\0\x40\0\0\0\xa4\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\
-\0\0\0\0\0\0\0\0\x02\x24\0\0\0\xa8\x03\0\0\x01\0\0\x04\x04\0\0\0\xb3\x03\0\0\
-\x0e\0\0\0\0\0\0\0\x1c\x04\0\0\x01\0\0\x04\x04\0\0\0\x25\x04\0\0\x0e\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x9b\x04\0\0\0\0\0\
-\x0e\x25\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\
-\xaf\x04\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\
-\x12\0\0\0\x20\0\0\0\xc5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\
-\0\0\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xda\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xf1\x04\0\0\0\0\0\x0e\
-\x2d\0\0\0\x01\0\0\0\xf9\x04\0\0\x04\0\0\x0f\0\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\
-\0\x28\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\
-\0\0\x11\0\0\0\x01\x05\0\0\x01\0\0\x0f\0\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\
-\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\
-\x74\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\
-\x70\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\
-\x30\x3a\x30\0\x2f\x68\x6f\x6d\x65\x2f\x61\x6c\x72\x75\x61\x2f\x62\x75\x69\x6c\
-\x64\x2f\x6c\x69\x6e\x75\x78\x2f\x6b\x65\x72\x6e\x65\x6c\x2f\x62\x70\x66\x2f\
-\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2f\x69\
-\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\x09\x73\x74\x72\x75\
-\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\x73\x65\x71\x20\x3d\x20\
-\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x3b\0\x62\x70\x66\x5f\
-\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\x73\x65\x73\x73\x69\x6f\
-\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\x65\x71\x5f\x66\x69\x6c\
-\x65\0\x5f\x5f\x75\x36\x34\0\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\x20\x75\x6e\
-\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x30\x3a\x31\0\x09\x73\x74\x72\x75\
-\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\x20\x63\
-\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\x29\0\
-\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\
-\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\
-\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\x29\
-\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\
-\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x5c\
-\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\0\
-\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
-\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
-\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
-\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
-\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
-\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
-\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
-\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
-\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
-\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
-\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
-\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
-\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
-\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
-\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
-\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
-\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
-\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
-\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
-\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
-\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
-\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
-\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
-\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
-\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
-\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
-\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
-\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
-\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
-\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
-\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
-\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
-\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
-\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
-\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
-\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
-\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
-\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
-\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
-\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
-\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x9f\
-\xeb\x01\0\x20\0\0\0\0\0\0\0\x24\0\0\0\x24\0\0\0\x44\x02\0\0\x68\x02\0\0\xa4\
-\x01\0\0\x08\0\0\0\x31\0\0\0\x01\0\0\0\0\0\0\0\x07\0\0\0\x62\x02\0\0\x01\0\0\0\
-\0\0\0\0\x17\0\0\0\x10\0\0\0\x31\0\0\0\x09\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\
-\x1e\x40\x01\0\x08\0\0\0\x42\0\0\0\x87\0\0\0\x24\x40\x01\0\x10\0\0\0\x42\0\0\0\
-\xfe\0\0\0\x1d\x48\x01\0\x18\0\0\0\x42\0\0\0\x1f\x01\0\0\x06\x50\x01\0\x20\0\0\
-\0\x42\0\0\0\x2e\x01\0\0\x1d\x44\x01\0\x28\0\0\0\x42\0\0\0\x53\x01\0\0\x06\x5c\
-\x01\0\x38\0\0\0\x42\0\0\0\x66\x01\0\0\x03\x60\x01\0\x70\0\0\0\x42\0\0\0\xec\
-\x01\0\0\x02\x68\x01\0\xf0\0\0\0\x42\0\0\0\x3a\x02\0\0\x01\x70\x01\0\x62\x02\0\
-\0\x1a\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\x1e\x84\x01\0\x08\0\0\0\x42\0\0\0\x87\
-\0\0\0\x24\x84\x01\0\x10\0\0\0\x42\0\0\0\x70\x02\0\0\x1f\x8c\x01\0\x18\0\0\0\
-\x42\0\0\0\x94\x02\0\0\x06\x98\x01\0\x20\0\0\0\x42\0\0\0\xad\x02\0\0\x0e\xa4\
-\x01\0\x28\0\0\0\x42\0\0\0\x2e\x01\0\0\x1d\x88\x01\0\x30\0\0\0\x42\0\0\0\x53\
-\x01\0\0\x06\xa8\x01\0\x40\0\0\0\x42\0\0\0\xbf\x02\0\0\x03\xac\x01\0\x80\0\0\0\
-\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xb8\0\0\0\x42\0\0\0\x6a\x03\0\0\x06\x08\
-\x01\0\xd0\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\x42\0\0\0\xbb\x03\0\0\x0f\
-\x14\x01\0\xe0\0\0\0\x42\0\0\0\xd0\x03\0\0\x2d\x18\x01\0\xf0\0\0\0\x42\0\0\0\
-\x07\x04\0\0\x0d\x10\x01\0\0\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x08\x01\0\0\x42\
-\0\0\0\xd0\x03\0\0\x02\x18\x01\0\x20\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\
-\0\x38\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x40\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\
-\x1c\x01\0\x58\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\0\x60\x01\0\0\x42\0\0\
-\0\x5c\x04\0\0\x1b\x20\x01\0\x68\x01\0\0\x42\0\0\0\x5c\x04\0\0\x06\x20\x01\0\
-\x70\x01\0\0\x42\0\0\0\x7f\x04\0\0\x0d\x28\x01\0\x78\x01\0\0\x42\0\0\0\0\0\0\0\
-\0\0\0\0\x80\x01\0\0\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xf8\x01\0\0\x42\0\0\0\
-\x3a\x02\0\0\x01\xc4\x01\0\x10\0\0\0\x31\0\0\0\x07\0\0\0\0\0\0\0\x02\0\0\0\x3e\
-\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x02\0\0\0\xfa\0\
-\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x70\0\0\0\x0d\0\0\0\x3e\0\
-\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xfa\0\0\0\0\0\0\0\xa0\0\0\0\x0d\0\0\0\x2a\x01\
-\0\0\0\0\0\0\x62\x02\0\0\x12\0\0\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\
-\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xfa\0\0\0\0\0\0\0\x20\0\0\0\
-\x18\0\0\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x80\0\0\0\
-\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\xa8\0\0\0\
-\x1a\0\0\0\x62\x03\0\0\0\0\0\0\xb0\0\0\0\x1a\0\0\0\x66\x03\0\0\0\0\0\0\xc0\0\0\
-\0\x1f\0\0\0\x94\x03\0\0\0\0\0\0\xd8\0\0\0\x20\0\0\0\xfa\0\0\0\0\0\0\0\xf0\0\0\
-\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x24\0\0\0\x3e\0\0\0\0\0\0\0\x50\x01\
-\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\x60\x01\0\0\x20\0\0\0\x56\x04\0\0\0\0\0\0\x88\
-\x01\0\0\x1a\0\0\0\x2a\x01\0\0\0\0\0\0\x98\x01\0\0\x1a\0\0\0\x97\x04\0\0\0\0\0\
-\0\xa0\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x91\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xe6\0\0\
-\0\0\0\x02\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\0\0\x02\0\xf0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\xdf\0\0\0\0\0\x03\0\x78\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\xd1\0\0\0\0\0\x03\0\x80\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xca\0\0\0\0\0\x03\0\
-\xf8\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x14\0\0\0\x01\0\x04\0\0\0\0\0\0\0\0\0\x23\
-\0\0\0\0\0\0\0\x04\x01\0\0\x01\0\x04\0\x23\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x28\
-\0\0\0\x01\0\x04\0\x31\0\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\xed\0\0\0\x01\0\x04\0\
-\x51\0\0\0\0\0\0\0\x11\0\0\0\0\0\0\0\0\0\0\0\x03\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\
-\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc2\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\
-\x04\0\0\0\0\0\0\0\x3d\0\0\0\x12\0\x02\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x5b\
-\0\0\0\x12\0\x03\0\0\0\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\x48\0\0\0\0\0\0\0\x01\0\
-\0\0\x0d\0\0\0\xc8\0\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\x50\0\0\0\0\0\0\0\x01\0\0\
-\0\x0d\0\0\0\xd0\x01\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\xf0\x03\0\0\0\0\0\0\x0a\0\
-\0\0\x0d\0\0\0\xfc\x03\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x08\x04\0\0\0\0\0\0\x0a\
-\0\0\0\x0d\0\0\0\x14\x04\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x2c\x04\0\0\0\0\0\0\0\
-\0\0\0\x0e\0\0\0\x2c\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x50\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x60\0\0\0\0\0\0\0\0\0\0\0\x0b\0\
-\0\0\x70\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\
-\x90\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xb0\0\
-\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xd0\0\0\0\0\
-\0\0\0\0\0\0\0\x0b\0\0\0\xe8\0\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\0\0\0\0\0\0\0\
-\0\0\0\0\x0c\0\0\0\x08\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x01\0\0\0\0\0\0\0\
-\0\0\0\x0c\0\0\0\x28\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x01\0\0\0\0\0\0\0\0\
-\0\0\x0c\0\0\0\x48\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x01\0\0\0\0\0\0\0\0\0\
-\0\x0c\0\0\0\x68\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x88\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x98\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xa8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xb8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xc8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xd8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xe8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x28\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x48\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x68\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x94\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xb4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xd4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xe4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xf4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x0c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x1c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x2c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x3c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x5c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x6c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x7c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x8c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x9c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xac\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xbc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xcc\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xdc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xec\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xfc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x0c\x04\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x1c\x04\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4d\x4e\x40\x41\x42\x43\x4c\0\
-\x2e\x74\x65\x78\x74\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\x2e\x65\x78\x74\0\x64\
-\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\
-\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\
-\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x2e\x72\x65\x6c\x69\x74\x65\
-\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\
-\x72\x6f\x67\0\x2e\x72\x65\x6c\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\
-\x67\0\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\x64\x72\x73\x69\x67\0\x6c\x69\x63\x65\
-\x6e\x73\x65\0\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\
-\x2e\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x2e\x72\x6f\x64\
-\x61\x74\x61\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\0\x4c\x49\x43\x45\x4e\x53\x45\0\
-\x4c\x42\x42\x31\x5f\x37\0\x4c\x42\x42\x31\x5f\x36\0\x4c\x42\x42\x30\x5f\x34\0\
-\x4c\x42\x42\x31\x5f\x33\0\x4c\x42\x42\x30\x5f\x33\0\x64\x75\x6d\x70\x5f\x62\
-\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x64\x75\x6d\
-\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\
-\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4e\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\x6d\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x40\x01\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\xb1\0\0\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\x03\0\
-\0\0\0\0\0\x62\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\x89\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\x03\0\0\0\0\0\0\x04\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbd\0\0\0\x01\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xae\x03\0\0\0\0\0\0\x3d\x09\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x01\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\xeb\x0c\0\0\0\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa9\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\x18\x11\0\0\0\0\0\0\x98\x01\0\0\0\0\0\0\x0e\0\0\0\x0e\0\0\0\x08\0\0\
-\0\0\0\0\0\x18\0\0\0\0\0\0\0\x4a\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\xb0\x12\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x02\0\0\0\x08\0\0\0\0\0\0\0\
-\x10\0\0\0\0\0\0\0\x69\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd0\x12\
-\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\
-\0\0\0\0\xb9\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xf0\x12\0\0\0\0\0\
-\0\x50\0\0\0\0\0\0\0\x08\0\0\0\x06\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\
-\x07\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x13\0\0\0\0\0\0\xe0\
-\x03\0\0\0\0\0\0\x08\0\0\0\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x7b\0\
-\0\0\x03\x4c\xff\x6f\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\0\0\0\0\x07\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa1\0\0\0\x03\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x27\x17\0\0\0\0\0\0\x1a\x01\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
-
-	return 0;
-err:
-	bpf_object__destroy_skeleton(s);
-	return -1;
-}
-
-#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5e7edf913060..7224691df2ec 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -213,7 +213,7 @@ static void __bpf_tramp_image_put_deferred(struct work_struct *work)
 	im = container_of(work, struct bpf_tramp_image, work);
 	bpf_image_ksym_del(&im->ksym);
 	bpf_jit_free_exec(im->image);
-	bpf_jit_uncharge_modmem(1);
+	bpf_jit_uncharge_modmem(PAGE_SIZE);
 	percpu_ref_exit(&im->pcref);
 	kfree_rcu(im, rcu);
 }
@@ -310,7 +310,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
 	if (!im)
 		goto out;
 
-	err = bpf_jit_charge_modmem(1);
+	err = bpf_jit_charge_modmem(PAGE_SIZE);
 	if (err)
 		goto out_free_im;
 
@@ -332,7 +332,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
 out_free_image:
 	bpf_jit_free_exec(im->image);
 out_uncharge:
-	bpf_jit_uncharge_modmem(1);
+	bpf_jit_uncharge_modmem(PAGE_SIZE);
 out_free_im:
 	kfree(im);
 out:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dcf065ec2774..bbef86cb4e72 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -536,7 +536,7 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 static const char *reg_type_str(struct bpf_verifier_env *env,
 				enum bpf_reg_type type)
 {
-	char postfix[16] = {0}, prefix[16] = {0};
+	char postfix[16] = {0}, prefix[32] = {0};
 	static const char * const str[] = {
 		[NOT_INIT]		= "?",
 		[SCALAR_VALUE]		= "inv",
@@ -570,9 +570,11 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 	}
 
 	if (type & MEM_RDONLY)
-		strncpy(prefix, "rdonly_", 16);
+		strncpy(prefix, "rdonly_", 32);
 	if (type & MEM_ALLOC)
-		strncpy(prefix, "alloc_", 16);
+		strncpy(prefix, "alloc_", 32);
+	if (type & MEM_USER)
+		strncpy(prefix, "user_", 32);
 
 	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
 		 prefix, str[base_type(type)], postfix);
@@ -1547,14 +1549,15 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
 			    struct bpf_reg_state *regs, u32 regno,
 			    enum bpf_reg_type reg_type,
-			    struct btf *btf, u32 btf_id)
+			    struct btf *btf, u32 btf_id,
+			    enum bpf_type_flag flag)
 {
 	if (reg_type == SCALAR_VALUE) {
 		mark_reg_unknown(env, regs, regno);
 		return;
 	}
 	mark_reg_known_zero(env, regs, regno);
-	regs[regno].type = PTR_TO_BTF_ID;
+	regs[regno].type = PTR_TO_BTF_ID | flag;
 	regs[regno].btf = btf;
 	regs[regno].btf_id = btf_id;
 }
@@ -4152,6 +4155,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	struct bpf_reg_state *reg = regs + regno;
 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
+	enum bpf_type_flag flag = 0;
 	u32 btf_id;
 	int ret;
 
@@ -4171,9 +4175,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
+	if (reg->type & MEM_USER) {
+		verbose(env,
+			"R%d is ptr_%s access user memory: off=%d\n",
+			regno, tname, off);
+		return -EACCES;
+	}
+
 	if (env->ops->btf_struct_access) {
 		ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
-						  off, size, atype, &btf_id);
+						  off, size, atype, &btf_id, &flag);
 	} else {
 		if (atype != BPF_READ) {
 			verbose(env, "only read is supported\n");
@@ -4181,14 +4192,14 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 		}
 
 		ret = btf_struct_access(&env->log, reg->btf, t, off, size,
-					atype, &btf_id);
+					atype, &btf_id, &flag);
 	}
 
 	if (ret < 0)
 		return ret;
 
 	if (atype == BPF_READ && value_regno >= 0)
-		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
+		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
 
 	return 0;
 }
@@ -4201,6 +4212,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
 {
 	struct bpf_reg_state *reg = regs + regno;
 	struct bpf_map *map = reg->map_ptr;
+	enum bpf_type_flag flag = 0;
 	const struct btf_type *t;
 	const char *tname;
 	u32 btf_id;
@@ -4238,12 +4250,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
-	ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
+	ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
 	if (ret < 0)
 		return ret;
 
 	if (value_regno >= 0)
-		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
+		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
 
 	return 0;
 }
@@ -4444,7 +4456,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		if (err < 0)
 			return err;
 
-		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
+		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
+				       &btf_id);
 		if (err)
 			verbose_linfo(env, insn_idx, "; ");
 		if (!err && t == BPF_READ && value_regno >= 0) {
@@ -13054,6 +13067,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 
 	prog->jited = 1;
 	prog->bpf_func = func[0]->bpf_func;
+	prog->jited_len = func[0]->jited_len;
 	prog->aux->func = func;
 	prog->aux->func_cnt = env->subprog_cnt;
 	bpf_prog_jit_attempt_done(prog);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 06a9e220069e..a2024ba32a20 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1235,6 +1235,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_task_stack_proto;
 	case BPF_FUNC_copy_from_user:
 		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+	case BPF_FUNC_copy_from_user_task:
+		return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_per_cpu_ptr:
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 14b89aa37c5c..1555da672275 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -296,7 +296,7 @@ config DEBUG_INFO_DWARF4
 config DEBUG_INFO_DWARF5
 	bool "Generate DWARF Version 5 debuginfo"
 	depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502)))
-	depends on !DEBUG_INFO_BTF
+	depends on !DEBUG_INFO_BTF || PAHOLE_VERSION >= 121
 	help
 	  Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
 	  5.0+ accepts the -gdwarf-5 flag but only had partial support for some
@@ -323,7 +323,15 @@ config DEBUG_INFO_BTF
 	  DWARF type info into equivalent deduplicated BTF type info.
 
 config PAHOLE_HAS_SPLIT_BTF
-	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
+	def_bool PAHOLE_VERSION >= 119
+
+config PAHOLE_HAS_BTF_TAG
+	def_bool PAHOLE_VERSION >= 123
+	depends on CC_IS_CLANG
+	help
+	  Decide whether pahole emits btf_tag attributes (btf_type_tag and
+	  btf_decl_tag) or not. Currently only clang compiler implements
+	  these attributes, so make the config depend on CC_IS_CLANG.
 
 config DEBUG_INFO_BTF_MODULES
 	def_bool y
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index a6789c0c626b..dc7b14aa3431 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -20,6 +20,7 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 	unsigned long flags;
 	bool leak = false;
 
+	dir->dead = true;
 	spin_lock_irqsave(&dir->lock, flags);
 	list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
 		list_del(&tracker->head);
@@ -37,6 +38,7 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 	spin_unlock_irqrestore(&dir->lock, flags);
 	WARN_ON_ONCE(leak);
 	WARN_ON_ONCE(refcount_read(&dir->untracked) != 1);
+	WARN_ON_ONCE(refcount_read(&dir->no_tracker) != 1);
 }
 EXPORT_SYMBOL(ref_tracker_dir_exit);
 
@@ -72,6 +74,12 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
 	gfp_t gfp_mask = gfp;
 	unsigned long flags;
 
+	WARN_ON_ONCE(dir->dead);
+
+	if (!trackerp) {
+		refcount_inc(&dir->no_tracker);
+		return 0;
+	}
 	if (gfp & __GFP_DIRECT_RECLAIM)
 		gfp_mask |= __GFP_NOFAIL;
 	*trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask);
@@ -81,7 +89,6 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
 		return -ENOMEM;
 	}
 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-	nr_entries = filter_irq_stacks(entries, nr_entries);
 	tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);
 
 	spin_lock_irqsave(&dir->lock, flags);
@@ -95,17 +102,23 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
 		     struct ref_tracker **trackerp)
 {
 	unsigned long entries[REF_TRACKER_STACK_ENTRIES];
-	struct ref_tracker *tracker = *trackerp;
 	depot_stack_handle_t stack_handle;
+	struct ref_tracker *tracker;
 	unsigned int nr_entries;
 	unsigned long flags;
 
+	WARN_ON_ONCE(dir->dead);
+
+	if (!trackerp) {
+		refcount_dec(&dir->no_tracker);
+		return 0;
+	}
+	tracker = *trackerp;
 	if (!tracker) {
 		refcount_dec(&dir->untracked);
 		return -EEXIST;
 	}
 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-	nr_entries = filter_irq_stacks(entries, nr_entries);
 	stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
 
 	spin_lock_irqsave(&dir->lock, flags);
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index a068757eabaf..7b3341cef926 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -5,6 +5,7 @@
  * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
  */
 
+#include <linux/if_arp.h>
 #include <linux/module.h>
 
 #include <net/6lowpan.h>
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index f4004cf0ff6f..9f311fddfaf9 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -134,7 +134,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
 {
 	struct inet6_dev *in6_dev = __in6_dev_get(dev);
 
-	if (in6_dev && in6_dev->cnf.mc_forwarding)
+	if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
 		return BATADV_NO_FLAGS;
 	else
 		return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index fbc896323bec..d0e54e30658a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -145,7 +145,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
 					   const struct btf *btf,
 					   const struct btf_type *t, int off,
 					   int size, enum bpf_access_type atype,
-					   u32 *next_btf_id)
+					   u32 *next_btf_id,
+					   enum bpf_type_flag *flag)
 {
 	const struct btf_type *state;
 	s32 type_id;
@@ -162,7 +163,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
 		return -EACCES;
 	}
 
-	err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+	err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+				flag);
 	if (err < 0)
 		return err;
 
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 65b52b4bd6e1..f08034500813 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -154,7 +154,8 @@ static int bpf_test_finish(const union bpf_attr *kattr,
 			goto out;
 
 		if (sinfo) {
-			int i, offset = len, data_len;
+			int i, offset = len;
+			u32 data_len;
 
 			for (i = 0; i < sinfo->nr_frags; i++) {
 				skb_frag_t *frag = &sinfo->frags[i];
@@ -164,7 +165,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
 					break;
 				}
 
-				data_len = min_t(int, copy_size - offset,
+				data_len = min_t(u32, copy_size - offset,
 						 skb_frag_size(frag));
 
 				if (copy_to_user(data_out + offset,
@@ -960,7 +961,12 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 		while (size < kattr->test.data_size_in) {
 			struct page *page;
 			skb_frag_t *frag;
-			int data_len;
+			u32 data_len;
+
+			if (sinfo->nr_frags == MAX_SKB_FRAGS) {
+				ret = -ENOMEM;
+				goto out;
+			}
 
 			page = alloc_page(GFP_KERNEL);
 			if (!page) {
@@ -971,7 +977,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 			frag = &sinfo->frags[sinfo->nr_frags++];
 			__skb_frag_set_page(frag, page);
 
-			data_len = min_t(int, kattr->test.data_size_in - size,
+			data_len = min_t(u32, kattr->test.data_size_in - size,
 					 PAGE_SIZE);
 			skb_frag_size_set(frag, data_len);
 
@@ -1141,7 +1147,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
 	if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
 		goto out;
 
-	if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+	if (user_ctx->local_port > U16_MAX) {
 		ret = -ERANGE;
 		goto out;
 	}
@@ -1149,7 +1155,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
 	ctx.family = (u16)user_ctx->family;
 	ctx.protocol = (u16)user_ctx->protocol;
 	ctx.dport = (u16)user_ctx->local_port;
-	ctx.sport = (__force __be16)user_ctx->remote_port;
+	ctx.sport = user_ctx->remote_port;
 
 	switch (ctx.family) {
 	case AF_INET:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 440139706130..52dd0b6835bc 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -268,7 +268,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
 
 	err = caifd->layer.up->receive(caifd->layer.up, pkt);
 
-	/* For -EILSEQ the packet is not freed so so it now */
+	/* For -EILSEQ the packet is not freed so free it now */
 	if (err == -EILSEQ)
 		cfpkt_destroy(pkt);
 
diff --git a/net/can/gw.c b/net/can/gw.c
index d8861e862f15..24221352e059 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit cangw_pernet_exit(struct net *net)
+static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
 {
+	struct net *net;
+
 	rtnl_lock();
-	cgw_remove_all_jobs(net);
+	list_for_each_entry(net, net_list, exit_list)
+		cgw_remove_all_jobs(net);
 	rtnl_unlock();
 }
 
 static struct pernet_operations cangw_pernet_ops = {
 	.init = cangw_pernet_init,
-	.exit = cangw_pernet_exit,
+	.exit_batch = cangw_pernet_exit_batch,
 };
 
 static __init int cgw_module_init(void)
diff --git a/net/core/dev.c b/net/core/dev.c
index f79744d99413..f5ef51601081 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9683,8 +9683,10 @@ int register_netdevice(struct net_device *dev)
 	linkwatch_init_dev(dev);
 
 	dev_init_scheduler(dev);
-	dev_hold(dev);
+
+	dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL);
 	list_netdevice(dev);
+
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 
 	/* If the device has permanent device address, driver should
@@ -10172,7 +10174,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
 		goto free_dev;
-	dev_hold(dev);
+	__dev_hold(dev);
 #else
 	refcount_set(&dev->dev_refcnt, 1);
 #endif
@@ -10449,7 +10451,7 @@ void unregister_netdevice_many(struct list_head *head)
 	synchronize_net();
 
 	list_for_each_entry(dev, head, unreg_list) {
-		dev_put(dev);
+		dev_put_track(dev, &dev->dev_registered_tracker);
 		net_set_todo(dev);
 	}
 
@@ -10732,8 +10734,7 @@ static int __net_init netdev_init(struct net *net)
 	BUILD_BUG_ON(GRO_HASH_BUCKETS >
 		     8 * sizeof_field(struct napi_struct, gro_bitmask));
 
-	if (net != &init_net)
-		INIT_LIST_HEAD(&net->dev_base_head);
+	INIT_LIST_HEAD(&net->dev_base_head);
 
 	net->dev_name_head = netdev_create_hash();
 	if (net->dev_name_head == NULL)
@@ -10849,14 +10850,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
 	.exit = netdev_exit,
 };
 
-static void __net_exit default_device_exit(struct net *net)
+static void __net_exit default_device_exit_net(struct net *net)
 {
 	struct net_device *dev, *aux;
 	/*
 	 * Push all migratable network devices back to the
 	 * initial network namespace
 	 */
-	rtnl_lock();
+	ASSERT_RTNL();
 	for_each_netdev_safe(net, dev, aux) {
 		int err;
 		char fb_name[IFNAMSIZ];
@@ -10880,22 +10881,22 @@ static void __net_exit default_device_exit(struct net *net)
 			BUG();
 		}
 	}
-	rtnl_unlock();
 }
 
 static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
 {
-	/* Return with the rtnl_lock held when there are no network
+	/* Return (with the rtnl_lock held) when there are no network
 	 * devices unregistering in any network namespace in net_list.
 	 */
-	struct net *net;
-	bool unregistering;
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	bool unregistering;
+	struct net *net;
 
+	ASSERT_RTNL();
 	add_wait_queue(&netdev_unregistering_wq, &wait);
 	for (;;) {
 		unregistering = false;
-		rtnl_lock();
+
 		list_for_each_entry(net, net_list, exit_list) {
 			if (net->dev_unreg_count > 0) {
 				unregistering = true;
@@ -10907,6 +10908,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
 		__rtnl_unlock();
 
 		wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+		rtnl_lock();
 	}
 	remove_wait_queue(&netdev_unregistering_wq, &wait);
 }
@@ -10922,6 +10924,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 	struct net *net;
 	LIST_HEAD(dev_kill_list);
 
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list) {
+		default_device_exit_net(net);
+		cond_resched();
+	}
 	/* To prevent network device cleanup code from dereferencing
 	 * loopback devices or network devices that have been freed
 	 * wait here for all pending unregistrations to complete,
@@ -10934,6 +10941,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 	 * default_device_exit_batch.
 	 */
 	rtnl_lock_unregistering(net_list);
+
 	list_for_each_entry(net, net_list, exit_list) {
 		for_each_netdev_reverse(net, dev) {
 			if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@@ -10947,7 +10955,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 }
 
 static struct pernet_operations __net_initdata default_device_ops = {
-	.exit = default_device_exit,
 	.exit_batch = default_device_exit_batch,
 };
 
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7b288a121a41..4641126b8a20 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -48,6 +48,19 @@
 static int trace_state = TRACE_OFF;
 static bool monitor_hw;
 
+#undef EM
+#undef EMe
+
+#define EM(a, b)	[a] = #b,
+#define EMe(a, b)	[a] = #b
+
+/* drop_reasons is used to translate 'enum skb_drop_reason' to string,
+ * which is reported to user space.
+ */
+static const char * const drop_reasons[] = {
+	TRACE_SKB_DROP_REASON
+};
+
 /* net_dm_mutex
  *
  * An overall lock guarding every operation coming from userspace.
@@ -126,6 +139,7 @@ struct net_dm_skb_cb {
 		struct devlink_trap_metadata *hw_metadata;
 		void *pc;
 	};
+	enum skb_drop_reason reason;
 };
 
 #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -498,6 +512,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
 {
 	ktime_t tstamp = ktime_get_real();
 	struct per_cpu_dm_data *data;
+	struct net_dm_skb_cb *cb;
 	struct sk_buff *nskb;
 	unsigned long flags;
 
@@ -508,7 +523,11 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
 	if (!nskb)
 		return;
 
-	NET_DM_SKB_CB(nskb)->pc = location;
+	if ((unsigned int)reason >= SKB_DROP_REASON_MAX)
+		reason = SKB_DROP_REASON_NOT_SPECIFIED;
+	cb = NET_DM_SKB_CB(nskb);
+	cb->reason = reason;
+	cb->pc = location;
 	/* Override the timestamp because we care about the time when the
 	 * packet was dropped.
 	 */
@@ -553,7 +572,8 @@ static size_t net_dm_in_port_size(void)
 
 #define NET_DM_MAX_SYMBOL_LEN 40
 
-static size_t net_dm_packet_report_size(size_t payload_len)
+static size_t net_dm_packet_report_size(size_t payload_len,
+					enum skb_drop_reason reason)
 {
 	size_t size;
 
@@ -574,6 +594,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
 	       nla_total_size(sizeof(u32)) +
 	       /* NET_DM_ATTR_PROTO */
 	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_REASON */
+	       nla_total_size(strlen(drop_reasons[reason]) + 1) +
 	       /* NET_DM_ATTR_PAYLOAD */
 	       nla_total_size(payload_len);
 }
@@ -606,7 +628,7 @@ nla_put_failure:
 static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 				     size_t payload_len)
 {
-	u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+	struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
 	char buf[NET_DM_MAX_SYMBOL_LEN];
 	struct nlattr *attr;
 	void *hdr;
@@ -620,10 +642,15 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 	if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
 		goto nla_put_failure;
 
-	if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc,
+			      NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, NET_DM_ATTR_REASON,
+			   drop_reasons[cb->reason]))
 		goto nla_put_failure;
 
-	snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+	snprintf(buf, sizeof(buf), "%pS", cb->pc);
 	if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
 		goto nla_put_failure;
 
@@ -679,7 +706,9 @@ static void net_dm_packet_report(struct sk_buff *skb)
 	if (net_dm_trunc_len)
 		payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
 
-	msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+	msg = nlmsg_new(net_dm_packet_report_size(payload_len,
+						  NET_DM_SKB_CB(skb)->reason),
+			GFP_KERNEL);
 	if (!msg)
 		goto out;
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 9615ae1ab530..818244068c2d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2603,7 +2603,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
 	 * account for the headroom.
 	 */
 	bytes_sg_total = start - offset + bytes;
-	if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
+	if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
 		goto out;
 
 	/* At this point we need to linearize multiple scatterlist
@@ -2809,7 +2809,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 	/* Place newly allocated data buffer */
 	sk_mem_charge(msg->sk, len);
 	msg->sg.size += len;
-	__clear_bit(new, &msg->sg.copy);
+	__clear_bit(new, msg->sg.copy);
 	sg_set_page(&msg->sg.data[new], page, len + copy, 0);
 	if (rsge.length) {
 		get_page(sg_page(&rsge));
@@ -8275,6 +8275,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info)
 {
 	const int size_default = sizeof(__u32);
+	int field_size;
 
 	if (off < 0 || off >= sizeof(struct bpf_sock))
 		return false;
@@ -8286,7 +8287,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 	case offsetof(struct bpf_sock, family):
 	case offsetof(struct bpf_sock, type):
 	case offsetof(struct bpf_sock, protocol):
-	case offsetof(struct bpf_sock, dst_port):
 	case offsetof(struct bpf_sock, src_port):
 	case offsetof(struct bpf_sock, rx_queue_mapping):
 	case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -8295,6 +8295,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
 		bpf_ctx_record_field_size(info, size_default);
 		return bpf_ctx_narrow_access_ok(off, size, size_default);
+	case bpf_ctx_range(struct bpf_sock, dst_port):
+		field_size = size == size_default ?
+			size_default : sizeof_field(struct bpf_sock, dst_port);
+		bpf_ctx_record_field_size(info, field_size);
+		return bpf_ctx_narrow_access_ok(off, size, field_size);
+	case offsetofend(struct bpf_sock, dst_port) ...
+	     offsetof(struct bpf_sock, dst_ip4) - 1:
+		return false;
 	}
 
 	return size == size_default;
@@ -10845,7 +10853,8 @@ static bool sk_lookup_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
 	case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
 	case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
-	case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+	case offsetof(struct bpf_sk_lookup, remote_port) ...
+	     offsetof(struct bpf_sk_lookup, local_ip4) - 1:
 	case bpf_ctx_range(struct bpf_sk_lookup, local_port):
 	case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
 		bpf_ctx_record_field_size(info, sizeof(__u32));
diff --git a/net/core/gro.c b/net/core/gro.c
index a11b286d1495..ee5e7e889d8b 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -459,29 +459,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
 		skb_set_network_header(skb, skb_gro_offset(skb));
 		skb_reset_mac_len(skb);
-		NAPI_GRO_CB(skb)->same_flow = 0;
+		BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32));
+		BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
+					 sizeof(u32))); /* Avoid slow unaligned acc */
+		*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
 		NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
-		NAPI_GRO_CB(skb)->free = 0;
-		NAPI_GRO_CB(skb)->encap_mark = 0;
-		NAPI_GRO_CB(skb)->recursion_counter = 0;
-		NAPI_GRO_CB(skb)->is_fou = 0;
 		NAPI_GRO_CB(skb)->is_atomic = 1;
-		NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
 		/* Setup for GRO checksum validation */
 		switch (skb->ip_summed) {
 		case CHECKSUM_COMPLETE:
 			NAPI_GRO_CB(skb)->csum = skb->csum;
 			NAPI_GRO_CB(skb)->csum_valid = 1;
-			NAPI_GRO_CB(skb)->csum_cnt = 0;
 			break;
 		case CHECKSUM_UNNECESSARY:
 			NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
-			NAPI_GRO_CB(skb)->csum_valid = 0;
 			break;
-		default:
-			NAPI_GRO_CB(skb)->csum_cnt = 0;
-			NAPI_GRO_CB(skb)->csum_valid = 0;
 		}
 
 		pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
@@ -634,7 +627,6 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 
 	skb->encapsulation = 0;
 	skb_shinfo(skb)->gso_type = 0;
-	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
 	if (unlikely(skb->slow_gro)) {
 		skb_orphan(skb);
 		skb_ext_reset(skb);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index b0f5344d1185..95098d1a49bd 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -166,10 +166,10 @@ static void linkwatch_do_dev(struct net_device *dev)
 
 		netdev_state_change(dev);
 	}
-	/* Note: our callers are responsible for
-	 * calling netdev_tracker_free().
+	/* Note: our callers are responsible for calling netdev_tracker_free().
+	 * This is the reason we use __dev_put() instead of dev_put().
 	 */
-	dev_put(dev);
+	__dev_put(dev);
 }
 
 static void __linkwatch_run_queue(int urgent_only)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8711350085d6..0ec2f5906a27 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem);
 static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
 #endif
 
-struct net init_net = {
-	.ns.count	= REFCOUNT_INIT(1),
-	.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
-#ifdef CONFIG_KEYS
-	.key_domain	= &init_net_key_domain,
-#endif
-};
+struct net init_net;
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
@@ -1087,7 +1081,7 @@ out:
 	rtnl_set_sk_err(net, RTNLGRP_NSID, err);
 }
 
-static int __init net_ns_init(void)
+void __init net_ns_init(void)
 {
 	struct net_generic *ng;
 
@@ -1108,6 +1102,9 @@ static int __init net_ns_init(void)
 
 	rcu_assign_pointer(init_net.gen, ng);
 
+#ifdef CONFIG_KEYS
+	init_net.key_domain = &init_net_key_domain;
+#endif
 	down_write(&pernet_ops_rwsem);
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
@@ -1122,12 +1119,8 @@ static int __init net_ns_init(void)
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
 		      RTNL_FLAG_DOIT_UNLOCKED);
-
-	return 0;
 }
 
-pure_initcall(net_ns_init);
-
 static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
 {
 	ops_pre_exit_list(ops, net_exit_list);
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 99f3af1a9d4d..fe6094e9a2db 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/debugfs.h>
+#include <linux/jhash.h>
 #include "hsr_main.h"
 #include "hsr_framereg.h"
 
@@ -28,6 +29,7 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
 {
 	struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
 	struct hsr_node *node;
+	int i;
 
 	seq_printf(sfp, "Node Table entries for (%s) device\n",
 		   (priv->prot_version == PRP_V1 ? "PRP" : "HSR"));
@@ -39,22 +41,28 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
 		seq_puts(sfp, "DAN-H\n");
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(node, &priv->node_db, mac_list) {
-		/* skip self node */
-		if (hsr_addr_is_self(priv, node->macaddress_A))
-			continue;
-		seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
-		seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
-		seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]);
-		seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]);
-		seq_printf(sfp, "%14x, ", node->addr_B_port);
-
-		if (priv->prot_version == PRP_V1)
-			seq_printf(sfp, "%5x, %5x, %5x\n",
-				   node->san_a, node->san_b,
-				   (node->san_a == 0 && node->san_b == 0));
-		else
-			seq_printf(sfp, "%5x\n", 1);
+
+	for (i = 0 ; i < priv->hash_buckets; i++) {
+		hlist_for_each_entry_rcu(node, &priv->node_db[i], mac_list) {
+			/* skip self node */
+			if (hsr_addr_is_self(priv, node->macaddress_A))
+				continue;
+			seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
+			seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
+			seq_printf(sfp, "%10lx, ",
+				   node->time_in[HSR_PT_SLAVE_A]);
+			seq_printf(sfp, "%10lx, ",
+				   node->time_in[HSR_PT_SLAVE_B]);
+			seq_printf(sfp, "%14x, ", node->addr_B_port);
+
+			if (priv->prot_version == PRP_V1)
+				seq_printf(sfp, "%5x, %5x, %5x\n",
+					   node->san_a, node->san_b,
+					   (node->san_a == 0 &&
+					    node->san_b == 0));
+			else
+				seq_printf(sfp, "%5x\n", 1);
+		}
 	}
 	rcu_read_unlock();
 	return 0;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e57fdad9ef94..7f250216433d 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -485,12 +485,16 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 {
 	bool unregister = false;
 	struct hsr_priv *hsr;
-	int res;
+	int res, i;
 
 	hsr = netdev_priv(hsr_dev);
 	INIT_LIST_HEAD(&hsr->ports);
-	INIT_LIST_HEAD(&hsr->node_db);
-	INIT_LIST_HEAD(&hsr->self_node_db);
+	INIT_HLIST_HEAD(&hsr->self_node_db);
+	hsr->hash_buckets = HSR_HSIZE;
+	get_random_bytes(&hsr->hash_seed, sizeof(hsr->hash_seed));
+	for (i = 0; i < hsr->hash_buckets; i++)
+		INIT_HLIST_HEAD(&hsr->node_db[i]);
+
 	spin_lock_init(&hsr->list_lock);
 
 	eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index e59cbb4f0cd1..5bf357734b11 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -570,20 +570,23 @@ static int fill_frame_info(struct hsr_frame_info *frame,
 	struct ethhdr *ethhdr;
 	__be16 proto;
 	int ret;
+	u32 hash;
 
 	/* Check if skb contains ethhdr */
 	if (skb->mac_len < sizeof(struct ethhdr))
 		return -EINVAL;
 
 	memset(frame, 0, sizeof(*frame));
+
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	hash = hsr_mac_hash(port->hsr, ethhdr->h_source);
 	frame->is_supervision = is_supervision_frame(port->hsr, skb);
-	frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
+	frame->node_src = hsr_get_node(port, &hsr->node_db[hash], skb,
 				       frame->is_supervision,
 				       port->type);
 	if (!frame->node_src)
 		return -1; /* Unknown node and !is_supervision, or no mem */
 
-	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 	frame->is_vlan = false;
 	proto = ethhdr->h_proto;
 
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 0775f0f95dbf..b3c6ffa1894d 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -15,11 +15,28 @@
 #include <linux/etherdevice.h>
 #include <linux/slab.h>
 #include <linux/rculist.h>
+#include <linux/jhash.h>
 #include "hsr_main.h"
 #include "hsr_framereg.h"
 #include "hsr_netlink.h"
 
-/*	TODO: use hash lists for mac addresses (linux/jhash.h)?    */
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr)
+{
+	u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed);
+
+	return reciprocal_scale(hash, hsr->hash_buckets);
+}
+
+struct hsr_node *hsr_node_get_first(struct hlist_head *head)
+{
+	struct hlist_node *first;
+
+	first = rcu_dereference(hlist_first_rcu(head));
+	if (first)
+		return hlist_entry(first, struct hsr_node, mac_list);
+
+	return NULL;
+}
 
 /* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
  * false otherwise.
@@ -42,8 +59,7 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
 {
 	struct hsr_node *node;
 
-	node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node,
-				      mac_list);
+	node = hsr_node_get_first(&hsr->self_node_db);
 	if (!node) {
 		WARN_ONCE(1, "HSR: No self node\n");
 		return false;
@@ -59,12 +75,12 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
 
 /* Search for mac entry. Caller must hold rcu read lock.
  */
-static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
+static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db,
 					    const unsigned char addr[ETH_ALEN])
 {
 	struct hsr_node *node;
 
-	list_for_each_entry_rcu(node, node_db, mac_list) {
+	hlist_for_each_entry_rcu(node, node_db, mac_list) {
 		if (ether_addr_equal(node->macaddress_A, addr))
 			return node;
 	}
@@ -79,7 +95,7 @@ int hsr_create_self_node(struct hsr_priv *hsr,
 			 const unsigned char addr_a[ETH_ALEN],
 			 const unsigned char addr_b[ETH_ALEN])
 {
-	struct list_head *self_node_db = &hsr->self_node_db;
+	struct hlist_head *self_node_db = &hsr->self_node_db;
 	struct hsr_node *node, *oldnode;
 
 	node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -90,14 +106,13 @@ int hsr_create_self_node(struct hsr_priv *hsr,
 	ether_addr_copy(node->macaddress_B, addr_b);
 
 	spin_lock_bh(&hsr->list_lock);
-	oldnode = list_first_or_null_rcu(self_node_db,
-					 struct hsr_node, mac_list);
+	oldnode = hsr_node_get_first(self_node_db);
 	if (oldnode) {
-		list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+		hlist_replace_rcu(&oldnode->mac_list, &node->mac_list);
 		spin_unlock_bh(&hsr->list_lock);
 		kfree_rcu(oldnode, rcu_head);
 	} else {
-		list_add_tail_rcu(&node->mac_list, self_node_db);
+		hlist_add_tail_rcu(&node->mac_list, self_node_db);
 		spin_unlock_bh(&hsr->list_lock);
 	}
 
@@ -106,25 +121,25 @@ int hsr_create_self_node(struct hsr_priv *hsr,
 
 void hsr_del_self_node(struct hsr_priv *hsr)
 {
-	struct list_head *self_node_db = &hsr->self_node_db;
+	struct hlist_head *self_node_db = &hsr->self_node_db;
 	struct hsr_node *node;
 
 	spin_lock_bh(&hsr->list_lock);
-	node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+	node = hsr_node_get_first(self_node_db);
 	if (node) {
-		list_del_rcu(&node->mac_list);
+		hlist_del_rcu(&node->mac_list);
 		kfree_rcu(node, rcu_head);
 	}
 	spin_unlock_bh(&hsr->list_lock);
 }
 
-void hsr_del_nodes(struct list_head *node_db)
+void hsr_del_nodes(struct hlist_head *node_db)
 {
 	struct hsr_node *node;
-	struct hsr_node *tmp;
+	struct hlist_node *tmp;
 
-	list_for_each_entry_safe(node, tmp, node_db, mac_list)
-		kfree(node);
+	hlist_for_each_entry_safe(node, tmp, node_db, mac_list)
+		kfree_rcu(node, rcu_head);
 }
 
 void prp_handle_san_frame(bool san, enum hsr_port_type port,
@@ -145,7 +160,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
  * originating from the newly added node.
  */
 static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
-				     struct list_head *node_db,
+				     struct hlist_head *node_db,
 				     unsigned char addr[],
 				     u16 seq_out, bool san,
 				     enum hsr_port_type rx_port)
@@ -175,14 +190,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
 		hsr->proto_ops->handle_san_frame(san, rx_port, new_node);
 
 	spin_lock_bh(&hsr->list_lock);
-	list_for_each_entry_rcu(node, node_db, mac_list,
-				lockdep_is_held(&hsr->list_lock)) {
+	hlist_for_each_entry_rcu(node, node_db, mac_list,
+				 lockdep_is_held(&hsr->list_lock)) {
 		if (ether_addr_equal(node->macaddress_A, addr))
 			goto out;
 		if (ether_addr_equal(node->macaddress_B, addr))
 			goto out;
 	}
-	list_add_tail_rcu(&new_node->mac_list, node_db);
+	hlist_add_tail_rcu(&new_node->mac_list, node_db);
 	spin_unlock_bh(&hsr->list_lock);
 	return new_node;
 out:
@@ -202,7 +217,7 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup)
 
 /* Get the hsr_node from which 'skb' was sent.
  */
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
 			      struct sk_buff *skb, bool is_sup,
 			      enum hsr_port_type rx_port)
 {
@@ -218,7 +233,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
 
 	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
-	list_for_each_entry_rcu(node, node_db, mac_list) {
+	hlist_for_each_entry_rcu(node, node_db, mac_list) {
 		if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
 			if (hsr->proto_ops->update_san_info)
 				hsr->proto_ops->update_san_info(node, is_sup);
@@ -268,11 +283,12 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
 	struct hsr_sup_tlv *hsr_sup_tlv;
 	struct hsr_node *node_real;
 	struct sk_buff *skb = NULL;
-	struct list_head *node_db;
+	struct hlist_head *node_db;
 	struct ethhdr *ethhdr;
 	int i;
 	unsigned int pull_size = 0;
 	unsigned int total_pull_size = 0;
+	u32 hash;
 
 	/* Here either frame->skb_hsr or frame->skb_prp should be
 	 * valid as supervision frame always will have protocol
@@ -310,11 +326,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
 	hsr_sp = (struct hsr_sup_payload *)skb->data;
 
 	/* Merge node_curr (registered on macaddress_B) into node_real */
-	node_db = &port_rcv->hsr->node_db;
-	node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
+	node_db = port_rcv->hsr->node_db;
+	hash = hsr_mac_hash(hsr, hsr_sp->macaddress_A);
+	node_real = find_node_by_addr_A(&node_db[hash], hsr_sp->macaddress_A);
 	if (!node_real)
 		/* No frame received from AddrA of this node yet */
-		node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
+		node_real = hsr_add_node(hsr, &node_db[hash],
+					 hsr_sp->macaddress_A,
 					 HSR_SEQNR_START - 1, true,
 					 port_rcv->type);
 	if (!node_real)
@@ -348,7 +366,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
 		hsr_sp = (struct hsr_sup_payload *)skb->data;
 
 		/* Check if redbox mac and node mac are equal. */
-		if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) {
+		if (!ether_addr_equal(node_real->macaddress_A,
+				      hsr_sp->macaddress_A)) {
 			/* This is a redbox supervision frame for a VDAN! */
 			goto done;
 		}
@@ -368,7 +387,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
 	node_real->addr_B_port = port_rcv->type;
 
 	spin_lock_bh(&hsr->list_lock);
-	list_del_rcu(&node_curr->mac_list);
+	hlist_del_rcu(&node_curr->mac_list);
 	spin_unlock_bh(&hsr->list_lock);
 	kfree_rcu(node_curr, rcu_head);
 
@@ -406,6 +425,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
 			 struct hsr_port *port)
 {
 	struct hsr_node *node_dst;
+	u32 hash;
 
 	if (!skb_mac_header_was_set(skb)) {
 		WARN_ONCE(1, "%s: Mac header not set\n", __func__);
@@ -415,7 +435,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
 	if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
 		return;
 
-	node_dst = find_node_by_addr_A(&port->hsr->node_db,
+	hash = hsr_mac_hash(port->hsr, eth_hdr(skb)->h_dest);
+	node_dst = find_node_by_addr_A(&port->hsr->node_db[hash],
 				       eth_hdr(skb)->h_dest);
 	if (!node_dst) {
 		if (net_ratelimit())
@@ -491,59 +512,73 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
 void hsr_prune_nodes(struct timer_list *t)
 {
 	struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
+	struct hlist_node *tmp;
 	struct hsr_node *node;
-	struct hsr_node *tmp;
 	struct hsr_port *port;
 	unsigned long timestamp;
 	unsigned long time_a, time_b;
+	int i;
 
 	spin_lock_bh(&hsr->list_lock);
-	list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
-		/* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
-		 * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
-		 * the master port. Thus the master node will be repeatedly
-		 * pruned leading to packet loss.
-		 */
-		if (hsr_addr_is_self(hsr, node->macaddress_A))
-			continue;
-
-		/* Shorthand */
-		time_a = node->time_in[HSR_PT_SLAVE_A];
-		time_b = node->time_in[HSR_PT_SLAVE_B];
-
-		/* Check for timestamps old enough to risk wrap-around */
-		if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
-			node->time_in_stale[HSR_PT_SLAVE_A] = true;
-		if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
-			node->time_in_stale[HSR_PT_SLAVE_B] = true;
-
-		/* Get age of newest frame from node.
-		 * At least one time_in is OK here; nodes get pruned long
-		 * before both time_ins can get stale
-		 */
-		timestamp = time_a;
-		if (node->time_in_stale[HSR_PT_SLAVE_A] ||
-		    (!node->time_in_stale[HSR_PT_SLAVE_B] &&
-		    time_after(time_b, time_a)))
-			timestamp = time_b;
-
-		/* Warn of ring error only as long as we get frames at all */
-		if (time_is_after_jiffies(timestamp +
-				msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
-			rcu_read_lock();
-			port = get_late_port(hsr, node);
-			if (port)
-				hsr_nl_ringerror(hsr, node->macaddress_A, port);
-			rcu_read_unlock();
-		}
 
-		/* Prune old entries */
-		if (time_is_before_jiffies(timestamp +
-				msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
-			hsr_nl_nodedown(hsr, node->macaddress_A);
-			list_del_rcu(&node->mac_list);
-			/* Note that we need to free this entry later: */
-			kfree_rcu(node, rcu_head);
+	for (i = 0; i < hsr->hash_buckets; i++) {
+		hlist_for_each_entry_safe(node, tmp, &hsr->node_db[i],
+					  mac_list) {
+			/* Don't prune own node.
+			 * Neither time_in[HSR_PT_SLAVE_A]
+			 * nor time_in[HSR_PT_SLAVE_B], will ever be updated
+			 * for the master port. Thus the master node will be
+			 * repeatedly pruned leading to packet loss.
+			 */
+			if (hsr_addr_is_self(hsr, node->macaddress_A))
+				continue;
+
+			/* Shorthand */
+			time_a = node->time_in[HSR_PT_SLAVE_A];
+			time_b = node->time_in[HSR_PT_SLAVE_B];
+
+			/* Check for timestamps old enough to
+			 * risk wrap-around
+			 */
+			if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
+				node->time_in_stale[HSR_PT_SLAVE_A] = true;
+			if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
+				node->time_in_stale[HSR_PT_SLAVE_B] = true;
+
+			/* Get age of newest frame from node.
+			 * At least one time_in is OK here; nodes get pruned
+			 * long before both time_ins can get stale
+			 */
+			timestamp = time_a;
+			if (node->time_in_stale[HSR_PT_SLAVE_A] ||
+			    (!node->time_in_stale[HSR_PT_SLAVE_B] &&
+			     time_after(time_b, time_a)))
+				timestamp = time_b;
+
+			/* Warn of ring error only as long as we get
+			 * frames at all
+			 */
+			if (time_is_after_jiffies(timestamp +
+						  msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
+				rcu_read_lock();
+				port = get_late_port(hsr, node);
+				if (port)
+					hsr_nl_ringerror(hsr,
+							 node->macaddress_A,
+							 port);
+				rcu_read_unlock();
+			}
+
+			/* Prune old entries */
+			if (time_is_before_jiffies(timestamp +
+						   msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+				hsr_nl_nodedown(hsr, node->macaddress_A);
+				hlist_del_rcu(&node->mac_list);
+				/* Note that we need to free this
+				 * entry later:
+				 */
+				kfree_rcu(node, rcu_head);
+			}
 		}
 	}
 	spin_unlock_bh(&hsr->list_lock);
@@ -557,17 +592,19 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
 			unsigned char addr[ETH_ALEN])
 {
 	struct hsr_node *node;
+	u32 hash;
+
+	hash = hsr_mac_hash(hsr, addr);
 
 	if (!_pos) {
-		node = list_first_or_null_rcu(&hsr->node_db,
-					      struct hsr_node, mac_list);
+		node = hsr_node_get_first(&hsr->node_db[hash]);
 		if (node)
 			ether_addr_copy(addr, node->macaddress_A);
 		return node;
 	}
 
 	node = _pos;
-	list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
+	hlist_for_each_entry_continue_rcu(node, mac_list) {
 		ether_addr_copy(addr, node->macaddress_A);
 		return node;
 	}
@@ -587,8 +624,11 @@ int hsr_get_node_data(struct hsr_priv *hsr,
 	struct hsr_node *node;
 	struct hsr_port *port;
 	unsigned long tdiff;
+	u32 hash;
+
+	hash = hsr_mac_hash(hsr, addr);
 
-	node = find_node_by_addr_A(&hsr->node_db, addr);
+	node = find_node_by_addr_A(&hsr->node_db[hash], addr);
 	if (!node)
 		return -ENOENT;
 
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index bdbb8c822ba1..d7cce6b161e3 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -28,9 +28,11 @@ struct hsr_frame_info {
 	bool is_from_san;
 };
 
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr);
+struct hsr_node *hsr_node_get_first(struct hlist_head *head);
 void hsr_del_self_node(struct hsr_priv *hsr);
-void hsr_del_nodes(struct list_head *node_db);
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+void hsr_del_nodes(struct hlist_head *node_db);
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
 			      struct sk_buff *skb, bool is_sup,
 			      enum hsr_port_type rx_port);
 void hsr_handle_sup_frame(struct hsr_frame_info *frame);
@@ -68,7 +70,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
 void prp_update_san_info(struct hsr_node *node, bool is_sup);
 
 struct hsr_node {
-	struct list_head	mac_list;
+	struct hlist_node	mac_list;
 	unsigned char		macaddress_A[ETH_ALEN];
 	unsigned char		macaddress_B[ETH_ALEN];
 	/* Local slave through which AddrB frames are received from this node */
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index ff9ec7634218..ca556bda3467 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -63,6 +63,9 @@ struct hsr_tag {
 
 #define HSR_V1_SUP_LSDUSIZE		52
 
+#define HSR_HSIZE_SHIFT	8
+#define HSR_HSIZE	BIT(HSR_HSIZE_SHIFT)
+
 /* The helper functions below assumes that 'path' occupies the 4 most
  * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
  * equivalently, the 4 most significant bits of HSR tag byte 14).
@@ -201,8 +204,8 @@ struct hsr_proto_ops {
 struct hsr_priv {
 	struct rcu_head		rcu_head;
 	struct list_head	ports;
-	struct list_head	node_db;	/* Known HSR nodes */
-	struct list_head	self_node_db;	/* MACs of slaves */
+	struct hlist_head	node_db[HSR_HSIZE];	/* Known HSR nodes */
+	struct hlist_head	self_node_db;	/* MACs of slaves */
 	struct timer_list	announce_timer;	/* Supervision frame dispatch */
 	struct timer_list	prune_timer;
 	int announce_count;
@@ -212,6 +215,8 @@ struct hsr_priv {
 	spinlock_t seqnr_lock;	/* locking for sequence_nr */
 	spinlock_t list_lock;	/* locking for node list */
 	struct hsr_proto_ops	*proto_ops;
+	u32 hash_buckets;
+	u32 hash_seed;
 #define PRP_LAN_ID	0x5     /* 0x1010 for A and 0x1011 for B. Bit 0 is set
 				 * based on SLAVE_A or SLAVE_B
 				 */
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index f3c8f91dbe2c..1405c037cf7a 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -105,6 +105,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
 static void hsr_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct hsr_priv *hsr = netdev_priv(dev);
+	int i;
 
 	del_timer_sync(&hsr->prune_timer);
 	del_timer_sync(&hsr->announce_timer);
@@ -113,7 +114,8 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
 	hsr_del_ports(hsr);
 
 	hsr_del_self_node(hsr);
-	hsr_del_nodes(&hsr->node_db);
+	for (i = 0; i < hsr->hash_buckets; i++)
+		hsr_del_nodes(&hsr->node_db[i]);
 
 	unregister_netdevice_queue(dev, head);
 }
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 2cf62718a282..2c087b7f17c5 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -47,6 +47,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/ieee802154.h>
+#include <linux/if_arp.h>
 
 #include <net/ipv6.h>
 
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index b60c9fd7147e..f79ab942f03b 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -96,12 +96,14 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
 					const struct btf *btf,
 					const struct btf_type *t, int off,
 					int size, enum bpf_access_type atype,
-					u32 *next_btf_id)
+					u32 *next_btf_id,
+					enum bpf_type_flag *flag)
 {
 	size_t end;
 
 	if (atype == BPF_READ)
-		return btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+		return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+					 flag);
 
 	if (t != tcp_sock_type) {
 		bpf_log(log, "only read is supported\n");
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..54811728d906 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -735,8 +736,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 	memset(cfg, 0, sizeof(*cfg));
 
 	rtm = nlmsg_data(nlh);
+
+	if (!inet_validate_dscp(rtm->rtm_tos)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid dsfield (tos): ECN bits must be 0");
+		err = -EINVAL;
+		goto errout;
+	}
+	cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos);
+
 	cfg->fc_dst_len = rtm->rtm_dst_len;
-	cfg->fc_tos = rtm->rtm_tos;
 	cfg->fc_table = rtm->rtm_table;
 	cfg->fc_protocol = rtm->rtm_protocol;
 	cfg->fc_scope = rtm->rtm_scope;
@@ -1547,7 +1556,7 @@ static void ip_fib_net_exit(struct net *net)
 {
 	int i;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@@ -1572,7 +1581,7 @@ static void ip_fib_net_exit(struct net *net)
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	fib4_rules_exit(net);
 #endif
-	rtnl_unlock();
+
 	kfree(net->ipv4.fib_table_hash);
 	fib4_notifier_exit(net);
 }
@@ -1599,7 +1608,9 @@ out:
 out_proc:
 	nl_fib_lookup_exit(net);
 out_nlfl:
+	rtnl_lock();
 	ip_fib_net_exit(net);
+	rtnl_unlock();
 	goto out;
 }
 
@@ -1607,12 +1618,23 @@ static void __net_exit fib_net_exit(struct net *net)
 {
 	fib_proc_exit(net);
 	nl_fib_lookup_exit(net);
-	ip_fib_net_exit(net);
+}
+
+static void __net_exit fib_net_exit_batch(struct list_head *net_list)
+{
+	struct net *net;
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list)
+		ip_fib_net_exit(net);
+
+	rtnl_unlock();
 }
 
 static struct pernet_operations fib_net_ops = {
 	.init = fib_net_init,
 	.exit = fib_net_exit,
+	.exit_batch = fib_net_exit_batch,
 };
 
 void __init ip_fib_init(void)
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e184bcb19943..a63014b54809 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -4,13 +4,14 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
+#include <net/inet_dscp.h>
 #include <net/ip_fib.h>
 #include <net/nexthop.h>
 
 struct fib_alias {
 	struct hlist_node	fa_list;
 	struct fib_info		*fa_info;
-	u8			fa_tos;
+	dscp_t			fa_dscp;
 	u8			fa_type;
 	u8			fa_state;
 	u8			fa_slen;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index e0b6c8b6de57..117c48571cf0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -23,6 +23,7 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 #include <linux/export.h>
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
@@ -35,7 +36,7 @@ struct fib4_rule {
 	struct fib_rule		common;
 	u8			dst_len;
 	u8			src_len;
-	u8			tos;
+	dscp_t			dscp;
 	__be32			src;
 	__be32			srcmask;
 	__be32			dst;
@@ -49,7 +50,7 @@ static bool fib4_rule_matchall(const struct fib_rule *rule)
 {
 	struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
 
-	if (r->dst_len || r->src_len || r->tos)
+	if (r->dst_len || r->src_len || r->dscp)
 		return false;
 	return fib_rule_matchall(rule);
 }
@@ -185,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
 	    ((daddr ^ r->dst) & r->dstmask))
 		return 0;
 
-	if (r->tos && (r->tos != fl4->flowi4_tos))
+	if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
 		return 0;
 
 	if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -225,10 +226,12 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	int err = -EINVAL;
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	if (frh->tos & ~IPTOS_TOS_MASK) {
-		NL_SET_ERR_MSG(extack, "Invalid tos");
+	if (!inet_validate_dscp(frh->tos)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid dsfield (tos): ECN bits must be 0");
 		goto errout;
 	}
+	rule4->dscp = inet_dsfield_to_dscp(frh->tos);
 
 	/* split local/main if they are not already split */
 	err = fib_unmerge(net);
@@ -270,7 +273,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule4->srcmask = inet_make_mask(rule4->src_len);
 	rule4->dst_len = frh->dst_len;
 	rule4->dstmask = inet_make_mask(rule4->dst_len);
-	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
 
@@ -313,7 +315,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->dst_len && (rule4->dst_len != frh->dst_len))
 		return 0;
 
-	if (frh->tos && (rule4->tos != frh->tos))
+	if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
 		return 0;
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
@@ -337,7 +339,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 
 	frh->dst_len = rule4->dst_len;
 	frh->src_len = rule4->src_len;
-	frh->tos = rule4->tos;
+	frh->tos = inet_dscp_to_dsfield(rule4->dscp);
 
 	if ((rule4->dst_len &&
 	     nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4c5399450682..c9c4f2f66b38 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
 #include <linux/hash.h>
 
 #include <net/arp.h>
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -523,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 	fri.tb_id = tb_id;
 	fri.dst = key;
 	fri.dst_len = dst_len;
-	fri.tos = fa->fa_tos;
+	fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
 	fri.type = fa->fa_type;
 	fri.offload = fa->offload;
 	fri.trap = fa->trap;
@@ -2039,7 +2040,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 	int order = -1, last_idx = -1;
 	struct fib_alias *fa, *fa1 = NULL;
 	u32 last_prio = res->fi->fib_priority;
-	u8 last_tos = 0;
+	dscp_t last_dscp = 0;
 
 	hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
@@ -2047,19 +2048,20 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 
 		if (fa->fa_slen != slen)
 			continue;
-		if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+		if (fa->fa_dscp &&
+		    fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
 			continue;
 		if (fa->tb_id != tb->tb_id)
 			continue;
 		if (next_fi->fib_priority > last_prio &&
-		    fa->fa_tos == last_tos) {
-			if (last_tos)
+		    fa->fa_dscp == last_dscp) {
+			if (last_dscp)
 				continue;
 			break;
 		}
 		if (next_fi->fib_flags & RTNH_F_DEAD)
 			continue;
-		last_tos = fa->fa_tos;
+		last_dscp = fa->fa_dscp;
 		last_prio = next_fi->fib_priority;
 
 		if (next_fi->fib_scope != res->scope ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..c05cd105e95e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -61,6 +61,7 @@
 #include <linux/vmalloc.h>
 #include <linux/notifier.h>
 #include <net/net_namespace.h>
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -81,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb,
 		.dst = dst,
 		.dst_len = dst_len,
 		.fi = fa->fa_info,
-		.tos = fa->fa_tos,
+		.tos = inet_dscp_to_dsfield(fa->fa_dscp),
 		.type = fa->fa_type,
 		.tb_id = fa->tb_id,
 	};
@@ -98,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net,
 		.dst = dst,
 		.dst_len = dst_len,
 		.fi = fa->fa_info,
-		.tos = fa->fa_tos,
+		.tos = inet_dscp_to_dsfield(fa->fa_dscp),
 		.type = fa->fa_type,
 		.tb_id = fa->tb_id,
 	};
@@ -973,13 +974,13 @@ static struct key_vector *fib_find_node(struct trie *t,
 	return n;
 }
 
-/* Return the first fib alias matching TOS with
+/* Return the first fib alias matching DSCP with
  * priority less than or equal to PRIO.
  * If 'find_first' is set, return the first matching
- * fib alias, regardless of TOS and priority.
+ * fib alias, regardless of DSCP and priority.
  */
 static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
-					u8 tos, u32 prio, u32 tb_id,
+					dscp_t dscp, u32 prio, u32 tb_id,
 					bool find_first)
 {
 	struct fib_alias *fa;
@@ -988,6 +989,10 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
 		return NULL;
 
 	hlist_for_each_entry(fa, fah, fa_list) {
+		/* Avoid Sparse warning when using dscp_t in inequalities */
+		u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp);
+		u8 __dscp = inet_dscp_to_dsfield(dscp);
+
 		if (fa->fa_slen < slen)
 			continue;
 		if (fa->fa_slen != slen)
@@ -998,9 +1003,9 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
 			break;
 		if (find_first)
 			return fa;
-		if (fa->fa_tos > tos)
+		if (__fa_dscp > __dscp)
 			continue;
-		if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
+		if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp)
 			return fa;
 	}
 
@@ -1027,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
 
 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
 		if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
-		    fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
-		    fa->fa_type == fri->type)
+		    fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) &&
+		    fa->fa_info == fri->fi && fa->fa_type == fri->type)
 			return fa;
 	}
 
@@ -1210,7 +1215,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 	struct fib_info *fi;
 	u8 plen = cfg->fc_dst_len;
 	u8 slen = KEYLENGTH - plen;
-	u8 tos = cfg->fc_tos;
+	dscp_t dscp;
 	u32 key;
 	int err;
 
@@ -1227,12 +1232,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 		goto err;
 	}
 
+	dscp = cfg->fc_dscp;
 	l = fib_find_node(t, &tp, key);
-	fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
+	fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
 				tb->tb_id, false) : NULL;
 
 	/* Now fa, if non-NULL, points to the first fib alias
-	 * with the same keys [prefix,tos,priority], if such key already
+	 * with the same keys [prefix,dscp,priority], if such key already
 	 * exists or to the node before which we will insert new one.
 	 *
 	 * If fa is NULL, we will need to allocate a new one and
@@ -1240,7 +1246,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 	 * of the new alias.
 	 */
 
-	if (fa && fa->fa_tos == tos &&
+	if (fa && fa->fa_dscp == dscp &&
 	    fa->fa_info->fib_priority == fi->fib_priority) {
 		struct fib_alias *fa_first, *fa_match;
 
@@ -1260,7 +1266,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 		hlist_for_each_entry_from(fa, fa_list) {
 			if ((fa->fa_slen != slen) ||
 			    (fa->tb_id != tb->tb_id) ||
-			    (fa->fa_tos != tos))
+			    (fa->fa_dscp != dscp))
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
@@ -1288,7 +1294,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 				goto out;
 
 			fi_drop = fa->fa_info;
-			new_fa->fa_tos = fa->fa_tos;
+			new_fa->fa_dscp = fa->fa_dscp;
 			new_fa->fa_info = fi;
 			new_fa->fa_type = cfg->fc_type;
 			state = fa->fa_state;
@@ -1351,7 +1357,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 		goto out;
 
 	new_fa->fa_info = fi;
-	new_fa->fa_tos = tos;
+	new_fa->fa_dscp = dscp;
 	new_fa->fa_type = cfg->fc_type;
 	new_fa->fa_state = 0;
 	new_fa->fa_slen = slen;
@@ -1567,7 +1573,8 @@ found:
 			if (index >= (1ul << fa->fa_slen))
 				continue;
 		}
-		if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+		if (fa->fa_dscp &&
+		    inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
 			continue;
 		if (fi->fib_dead)
 			continue;
@@ -1703,7 +1710,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 	struct key_vector *l, *tp;
 	u8 plen = cfg->fc_dst_len;
 	u8 slen = KEYLENGTH - plen;
-	u8 tos = cfg->fc_tos;
+	dscp_t dscp;
 	u32 key;
 
 	key = ntohl(cfg->fc_dst);
@@ -1715,11 +1722,13 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 	if (!l)
 		return -ESRCH;
 
-	fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
+	dscp = cfg->fc_dscp;
+	fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
 	if (!fa)
 		return -ESRCH;
 
-	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
+	pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen,
+		 inet_dscp_to_dsfield(dscp), t);
 
 	fa_to_delete = NULL;
 	hlist_for_each_entry_from(fa, fa_list) {
@@ -1727,7 +1736,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 
 		if ((fa->fa_slen != slen) ||
 		    (fa->tb_id != tb->tb_id) ||
-		    (fa->fa_tos != tos))
+		    (fa->fa_dscp != dscp))
 			break;
 
 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
@@ -2295,7 +2304,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 				fri.tb_id = tb->tb_id;
 				fri.dst = xkey;
 				fri.dst_len = KEYLENGTH - fa->fa_slen;
-				fri.tos = fa->fa_tos;
+				fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
 				fri.type = fa->fa_type;
 				fri.offload = fa->offload;
 				fri.trap = fa->trap;
@@ -2807,8 +2816,9 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 					     fa->fa_info->fib_scope),
 				   rtn_type(buf2, sizeof(buf2),
 					    fa->fa_type));
-			if (fa->fa_tos)
-				seq_printf(seq, " tos=%d", fa->fa_tos);
+			if (fa->fa_dscp)
+				seq_printf(seq, " tos=%d",
+					   inet_dscp_to_dsfield(fa->fa_dscp));
 			seq_putc(seq, '\n');
 		}
 	}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 30ab717ff1b8..17440840a791 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
 	int err = 0;
 
 	if (sk->sk_state != TCP_LISTEN) {
+		local_bh_disable();
 		inet_ehash_nolisten(sk, osk, NULL);
+		local_bh_enable();
 		return 0;
 	}
 	WARN_ON(!sk_unhashed(sk));
@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
 {
 	int err = 0;
 
-	if (sk->sk_state != TCP_CLOSE) {
-		local_bh_disable();
+	if (sk->sk_state != TCP_CLOSE)
 		err = __inet_hash(sk, NULL);
-		local_bh_enable();
-	}
 
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_hash);
 
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
 {
-	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	struct inet_listen_hashbucket *ilb = NULL;
-	spinlock_t *lock;
-
 	if (sk_unhashed(sk))
 		return;
 
-	if (sk->sk_state == TCP_LISTEN) {
-		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-		lock = &ilb->lock;
-	} else {
-		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-	}
-	spin_lock_bh(lock);
-	if (sk_unhashed(sk))
-		goto unlock;
-
 	if (rcu_access_pointer(sk->sk_reuseport_cb))
 		reuseport_stop_listen_sock(sk);
 	if (ilb) {
+		struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
 		inet_unhash2(hashinfo, sk);
 		ilb->count--;
 	}
 	__sk_nulls_del_node_init_rcu(sk);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
-	spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+	if (sk_unhashed(sk))
+		return;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		struct inet_listen_hashbucket *ilb;
+
+		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+		/* Don't disable bottom halves while acquiring the lock to
+		 * avoid circular locking dependency on PREEMPT_RT.
+		 */
+		spin_lock(&ilb->lock);
+		__inet_unhash(sk, ilb);
+		spin_unlock(&ilb->lock);
+	} else {
+		spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+		spin_lock_bh(lock);
+		__inet_unhash(sk, NULL);
+		spin_unlock_bh(lock);
+	}
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3a025c011971..d94f9f7e60c3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -196,7 +196,8 @@ resubmit:
 	if (ipprot) {
 		if (!ipprot->no_policy) {
 			if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-				kfree_skb(skb);
+				kfree_skb_reason(skb,
+						 SKB_DROP_REASON_XFRM_POLICY);
 				return;
 			}
 			nf_reset_ct(skb);
@@ -215,7 +216,7 @@ resubmit:
 				icmp_send(skb, ICMP_DEST_UNREACH,
 					  ICMP_PROT_UNREACH, 0);
 			}
-			kfree_skb(skb);
+			kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
 		} else {
 			__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
 			consume_skb(skb);
@@ -318,8 +319,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int (*edemux)(struct sk_buff *skb);
+	int err, drop_reason;
 	struct rtable *rt;
-	int err;
+
+	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
 
 	if (ip_can_use_hint(skb, iph, hint)) {
 		err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
@@ -396,19 +399,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 		 * so-called "hole-196" attack) so do it for both.
 		 */
 		if (in_dev &&
-		    IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+		    IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
+			drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
 			goto drop;
+		}
 	}
 
 	return NET_RX_SUCCESS;
 
 drop:
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 	return NET_RX_DROP;
 
 drop_error:
-	if (err == -EXDEV)
+	if (err == -EXDEV) {
+		drop_reason = SKB_DROP_REASON_IP_RPFILTER;
 		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+	}
 	goto drop;
 }
 
@@ -436,13 +443,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 {
 	const struct iphdr *iph;
+	int drop_reason;
 	u32 len;
 
 	/* When the interface is in promisc. mode, drop all the crap
 	 * that it receives, do not try to analyse it.
 	 */
-	if (skb->pkt_type == PACKET_OTHERHOST)
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		drop_reason = SKB_DROP_REASON_OTHERHOST;
 		goto drop;
+	}
 
 	__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
@@ -452,6 +462,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 		goto out;
 	}
 
+	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto inhdr_error;
 
@@ -488,6 +499,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 
 	len = ntohs(iph->tot_len);
 	if (skb->len < len) {
+		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
 		__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
 		goto drop;
 	} else if (len < (iph->ihl*4))
@@ -516,11 +528,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 	return skb;
 
 csum_error:
+	drop_reason = SKB_DROP_REASON_IP_CSUM;
 	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
 inhdr_error:
+	if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
+		drop_reason = SKB_DROP_REASON_IP_INHDR;
 	__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
 drop:
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 out:
 	return NULL;
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 07274619b9ea..4a55a620e526 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -266,13 +266,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 {
 	struct mr_table *mrt, *next;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
 		list_del(&mrt->list);
 		ipmr_free_table(mrt);
 	}
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
-	rtnl_unlock();
 }
 
 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -328,10 +327,9 @@ static int __net_init ipmr_rules_init(struct net *net)
 
 static void __net_exit ipmr_rules_exit(struct net *net)
 {
-	rtnl_lock();
+	ASSERT_RTNL();
 	ipmr_free_table(net->ipv4.mrt);
 	net->ipv4.mrt = NULL;
-	rtnl_unlock();
 }
 
 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -3075,7 +3073,9 @@ static int __net_init ipmr_net_init(struct net *net)
 proc_cache_fail:
 	remove_proc_entry("ip_mr_vif", net->proc_net);
 proc_vif_fail:
+	rtnl_lock();
 	ipmr_rules_exit(net);
+	rtnl_unlock();
 #endif
 ipmr_rules_fail:
 	ipmr_notifier_exit(net);
@@ -3090,12 +3090,22 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	remove_proc_entry("ip_mr_vif", net->proc_net);
 #endif
 	ipmr_notifier_exit(net);
-	ipmr_rules_exit(net);
+}
+
+static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
+{
+	struct net *net;
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list)
+		ipmr_rules_exit(net);
+	rtnl_unlock();
 }
 
 static struct pernet_operations ipmr_net_ops = {
 	.init = ipmr_net_init,
 	.exit = ipmr_net_exit,
+	.exit_batch = ipmr_net_exit_batch,
 };
 
 int __init ip_mr_init(void)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index eeafeccebb8d..e459a391e607 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3733,12 +3733,16 @@ out:
 }
 EXPORT_SYMBOL(nexthop_res_grp_activity_update);
 
-static void __net_exit nexthop_net_exit(struct net *net)
+static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
 {
+	struct net *net;
+
 	rtnl_lock();
-	flush_all_nexthops(net);
+	list_for_each_entry(net, net_list, exit_list) {
+		flush_all_nexthops(net);
+		kfree(net->nexthop.devhash);
+	}
 	rtnl_unlock();
-	kfree(net->nexthop.devhash);
 }
 
 static int __net_init nexthop_net_init(struct net *net)
@@ -3756,7 +3760,7 @@ static int __net_init nexthop_net_init(struct net *net)
 
 static struct pernet_operations nexthop_net_ops = {
 	.init = nexthop_net_init,
-	.exit = nexthop_net_exit,
+	.exit_batch = nexthop_net_exit_batch,
 };
 
 static int __init nexthop_init(void)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8b35075088e1..634766e6c7cc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -84,6 +84,7 @@
 #include <linux/jhash.h>
 #include <net/dst.h>
 #include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/route.h>
@@ -3391,7 +3392,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 				if (fa->fa_slen == slen &&
 				    fa->tb_id == fri.tb_id &&
-				    fa->fa_tos == fri.tos &&
+				    fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&
 				    fa->fa_info == res.fi &&
 				    fa->fa_type == fri.type) {
 					fri.offload = fa->offload;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 090360939401..6b4d8361560f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2093,16 +2093,20 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	rc = __udp_enqueue_schedule_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
+		int drop_reason;
 
 		/* Note that an ENOMEM error is charged twice */
-		if (rc == -ENOMEM)
+		if (rc == -ENOMEM) {
 			UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
 					is_udplite);
-		else
+			drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+		} else {
 			UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS,
 				      is_udplite);
+			drop_reason = SKB_DROP_REASON_PROTO_MEM;
+		}
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-		kfree_skb(skb);
+		kfree_skb_reason(skb, drop_reason);
 		trace_udp_fail_queue_rcv_skb(rc, sk);
 		return -1;
 	}
@@ -2120,14 +2124,17 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  */
 static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 {
+	int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
 	struct udp_sock *up = udp_sk(sk);
 	int is_udplite = IS_UDPLITE(sk);
 
 	/*
 	 *	Charge it to the socket, dropping if the queue is full.
 	 */
-	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
 		goto drop;
+	}
 	nf_reset_ct(skb);
 
 	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
@@ -2204,8 +2211,10 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	    udp_lib_checksum_complete(skb))
 			goto csum_error;
 
-	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
+		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
 		goto drop;
+	}
 
 	udp_csum_pull_header(skb);
 
@@ -2213,11 +2222,12 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	return __udp_queue_rcv_skb(sk, skb);
 
 csum_error:
+	drop_reason = SKB_DROP_REASON_UDP_CSUM;
 	__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 drop:
 	__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	atomic_inc(&sk->sk_drops);
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 	return -1;
 }
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f927c199a93c..4f402bc38f05 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -146,18 +146,11 @@ static int ipv6_generate_stable_address(struct in6_addr *addr,
 
 #define IN6_ADDR_HSIZE_SHIFT	8
 #define IN6_ADDR_HSIZE		(1 << IN6_ADDR_HSIZE_SHIFT)
-/*
- *	Configured unicast address hash table
- */
-static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_SPINLOCK(addrconf_hash_lock);
 
-static void addrconf_verify(void);
-static void addrconf_verify_rtnl(void);
-static void addrconf_verify_work(struct work_struct *);
+static void addrconf_verify(struct net *net);
+static void addrconf_verify_rtnl(struct net *net);
 
 static struct workqueue_struct *addrconf_wq;
-static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
 
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -554,7 +547,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 #ifdef CONFIG_IPV6_MROUTE
 	if ((all || type == NETCONFA_MC_FORWARDING) &&
 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
-			devconf->mc_forwarding) < 0)
+			atomic_read(&devconf->mc_forwarding)) < 0)
 		goto nla_put_failure;
 #endif
 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
@@ -1011,9 +1004,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 {
 	struct inet6_ifaddr *ifp;
 
-	hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
-		if (!net_eq(dev_net(ifp->idev->dev), net))
-			continue;
+	hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (!dev || ifp->idev->dev == dev)
 				return true;
@@ -1024,20 +1015,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 
 static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
 {
-	unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
+	struct net *net = dev_net(dev);
+	unsigned int hash = inet6_addr_hash(net, &ifa->addr);
 	int err = 0;
 
-	spin_lock(&addrconf_hash_lock);
+	spin_lock(&net->ipv6.addrconf_hash_lock);
 
 	/* Ignore adding duplicate addresses on an interface */
-	if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
+	if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) {
 		netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
 		err = -EEXIST;
 	} else {
-		hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+		hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]);
 	}
 
-	spin_unlock(&addrconf_hash_lock);
+	spin_unlock(&net->ipv6.addrconf_hash_lock);
 
 	return err;
 }
@@ -1261,9 +1253,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
 
 static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 {
-	int state;
 	enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
+	struct net *net = dev_net(ifp->idev->dev);
 	unsigned long expires;
+	int state;
 
 	ASSERT_RTNL();
 
@@ -1275,9 +1268,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	if (state == INET6_IFADDR_STATE_DEAD)
 		goto out;
 
-	spin_lock_bh(&addrconf_hash_lock);
+	spin_lock_bh(&net->ipv6.addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
-	spin_unlock_bh(&addrconf_hash_lock);
+	spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
 
 	write_lock_bh(&ifp->idev->lock);
 
@@ -1920,10 +1913,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
 	if (skip_dev_check)
 		dev = NULL;
 
-	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
 		ndev = ifp->idev->dev;
-		if (!net_eq(dev_net(ndev), net))
-			continue;
 
 		if (l3mdev_master_dev_rcu(ndev) != l3mdev)
 			continue;
@@ -2027,9 +2018,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	struct inet6_ifaddr *ifp, *result = NULL;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
-		if (!net_eq(dev_net(ifp->idev->dev), net))
-			continue;
+	hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (!dev || ifp->idev->dev == dev ||
 			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@@ -2096,7 +2085,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
-	struct net *net = dev_net(ifp->idev->dev);
+	struct net *net = dev_net(idev->dev);
 
 	if (addrconf_dad_end(ifp)) {
 		in6_ifa_put(ifp);
@@ -2675,7 +2664,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 				 create, now);
 
 		in6_ifa_put(ifp);
-		addrconf_verify();
+		addrconf_verify(net);
 	}
 
 	return 0;
@@ -2987,7 +2976,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			manage_tempaddrs(idev, ifp, cfg->valid_lft,
 					 cfg->preferred_lft, true, jiffies);
 		in6_ifa_put(ifp);
-		addrconf_verify_rtnl();
+		addrconf_verify_rtnl(net);
 		return 0;
 	} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
 		ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
@@ -3027,7 +3016,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
 				manage_tempaddrs(idev, ifp, 0, 0, false,
 						 jiffies);
 			ipv6_del_addr(ifp);
-			addrconf_verify_rtnl();
+			addrconf_verify_rtnl(net);
 			if (ipv6_addr_is_multicast(pfx)) {
 				ipv6_mc_config(net->ipv6.mc_autojoin_sk,
 					       false, pfx, dev->ifindex);
@@ -3772,9 +3761,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
 
 	/* Step 2: clear hash table */
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
-		struct hlist_head *h = &inet6_addr_lst[i];
+		struct hlist_head *h = &net->ipv6.inet6_addr_lst[i];
 
-		spin_lock_bh(&addrconf_hash_lock);
+		spin_lock_bh(&net->ipv6.addrconf_hash_lock);
 restart:
 		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
@@ -3790,7 +3779,7 @@ restart:
 				}
 			}
 		}
-		spin_unlock_bh(&addrconf_hash_lock);
+		spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
 	}
 
 	write_lock_bh(&idev->lock);
@@ -4246,7 +4235,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
 	 * before this temporary address becomes deprecated.
 	 */
 	if (ifp->flags & IFA_F_TEMPORARY)
-		addrconf_verify_rtnl();
+		addrconf_verify_rtnl(dev_net(dev));
 }
 
 static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
@@ -4288,10 +4277,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
 	}
 
 	for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-		hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
+		hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket],
 					 addr_lst) {
-			if (!net_eq(dev_net(ifa->idev->dev), net))
-				continue;
 			/* sync with offset */
 			if (p < state->offset) {
 				p++;
@@ -4314,8 +4301,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 
 	hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
-		if (!net_eq(dev_net(ifa->idev->dev), net))
-			continue;
 		state->offset++;
 		return ifa;
 	}
@@ -4323,9 +4308,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	state->offset = 0;
 	while (++state->bucket < IN6_ADDR_HSIZE) {
 		hlist_for_each_entry_rcu(ifa,
-				     &inet6_addr_lst[state->bucket], addr_lst) {
-			if (!net_eq(dev_net(ifa->idev->dev), net))
-				continue;
+				     &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) {
 			return ifa;
 		}
 	}
@@ -4413,9 +4396,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
 	int ret = 0;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
-		if (!net_eq(dev_net(ifp->idev->dev), net))
-			continue;
+	hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
 		    (ifp->flags & IFA_F_HOMEADDRESS)) {
 			ret = 1;
@@ -4453,9 +4434,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
 		hash = inet6_addr_hash(net, addr);
 
 		hash_found = false;
-		hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
-			if (!net_eq(dev_net(ifp->idev->dev), net))
-				continue;
+		hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
 
 			if (ipv6_addr_equal(&ifp->addr, addr)) {
 				hash_found = true;
@@ -4484,7 +4463,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
  *	Periodic address status verification
  */
 
-static void addrconf_verify_rtnl(void)
+static void addrconf_verify_rtnl(struct net *net)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
@@ -4496,11 +4475,11 @@ static void addrconf_verify_rtnl(void)
 	now = jiffies;
 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
-	cancel_delayed_work(&addr_chk_work);
+	cancel_delayed_work(&net->ipv6.addr_chk_work);
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
+		hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
 			/* When setting preferred_lft to a value not zero or
@@ -4599,20 +4578,23 @@ restart:
 
 	pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
 		 now, next, next_sec, next_sched);
-	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
+	mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now);
 	rcu_read_unlock_bh();
 }
 
 static void addrconf_verify_work(struct work_struct *w)
 {
+	struct net *net = container_of(to_delayed_work(w), struct net,
+				       ipv6.addr_chk_work);
+
 	rtnl_lock();
-	addrconf_verify_rtnl();
+	addrconf_verify_rtnl(net);
 	rtnl_unlock();
 }
 
-static void addrconf_verify(void)
+static void addrconf_verify(struct net *net)
 {
-	mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+	mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0);
 }
 
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
@@ -4708,7 +4690,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
 	return 0;
 }
 
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
+static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
+			     struct ifa6_config *cfg)
 {
 	u32 flags;
 	clock_t expires;
@@ -4822,7 +4805,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
 				 jiffies);
 	}
 
-	addrconf_verify_rtnl();
+	addrconf_verify_rtnl(net);
 
 	return 0;
 }
@@ -4909,7 +4892,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 		err = -EEXIST;
 	else
-		err = inet6_addr_modify(ifa, &cfg);
+		err = inet6_addr_modify(net, ifa, &cfg);
 
 	in6_ifa_put(ifa);
 
@@ -5533,7 +5516,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
-	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+	array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
 #endif
 	array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
 	array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
@@ -5794,7 +5777,7 @@ update_lft:
 
 	write_unlock_bh(&idev->lock);
 	inet6_ifinfo_notify(RTM_NEWLINK, idev);
-	addrconf_verify_rtnl();
+	addrconf_verify_rtnl(dev_net(dev));
 	return 0;
 }
 
@@ -7111,6 +7094,14 @@ static int __net_init addrconf_init_net(struct net *net)
 	int err = -ENOMEM;
 	struct ipv6_devconf *all, *dflt;
 
+	spin_lock_init(&net->ipv6.addrconf_hash_lock);
+	INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work);
+	net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE,
+					   sizeof(struct hlist_head),
+					   GFP_KERNEL);
+	if (!net->ipv6.inet6_addr_lst)
+		goto err_alloc_addr;
+
 	all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
 	if (!all)
 		goto err_alloc_all;
@@ -7172,11 +7163,15 @@ err_reg_all:
 err_alloc_dflt:
 	kfree(all);
 err_alloc_all:
+	kfree(net->ipv6.inet6_addr_lst);
+err_alloc_addr:
 	return err;
 }
 
 static void __net_exit addrconf_exit_net(struct net *net)
 {
+	int i;
+
 #ifdef CONFIG_SYSCTL
 	__addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
 				     NETCONFA_IFINDEX_DEFAULT);
@@ -7184,7 +7179,19 @@ static void __net_exit addrconf_exit_net(struct net *net)
 				     NETCONFA_IFINDEX_ALL);
 #endif
 	kfree(net->ipv6.devconf_dflt);
+	net->ipv6.devconf_dflt = NULL;
 	kfree(net->ipv6.devconf_all);
+	net->ipv6.devconf_all = NULL;
+
+	cancel_delayed_work(&net->ipv6.addr_chk_work);
+	/*
+	 *	Check hash table, then free it.
+	 */
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i]));
+
+	kfree(net->ipv6.inet6_addr_lst);
+	net->ipv6.inet6_addr_lst = NULL;
 }
 
 static struct pernet_operations addrconf_ops = {
@@ -7207,7 +7214,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
 int __init addrconf_init(void)
 {
 	struct inet6_dev *idev;
-	int i, err;
+	int err;
 
 	err = ipv6_addr_label_init();
 	if (err < 0) {
@@ -7254,12 +7261,9 @@ int __init addrconf_init(void)
 
 	ip6_route_init_special_entries();
 
-	for (i = 0; i < IN6_ADDR_HSIZE; i++)
-		INIT_HLIST_HEAD(&inet6_addr_lst[i]);
-
 	register_netdevice_notifier(&ipv6_dev_notf);
 
-	addrconf_verify();
+	addrconf_verify(&init_net);
 
 	rtnl_af_register(&inet6_ops);
 
@@ -7317,7 +7321,6 @@ out:
 void addrconf_cleanup(void)
 {
 	struct net_device *dev;
-	int i;
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 	unregister_pernet_subsys(&addrconf_ops);
@@ -7335,14 +7338,6 @@ void addrconf_cleanup(void)
 	}
 	addrconf_ifdown(init_net.loopback_dev, true);
 
-	/*
-	 *	Check hash table.
-	 */
-	spin_lock_bh(&addrconf_hash_lock);
-	for (i = 0; i < IN6_ADDR_HSIZE; i++)
-		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
-	spin_unlock_bh(&addrconf_hash_lock);
-	cancel_delayed_work(&addr_chk_work);
 	rtnl_unlock();
 
 	destroy_workqueue(addrconf_wq);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec029c86ae06..7c2003833010 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -16,6 +16,7 @@
 #include <linux/indirect_call_wrapper.h>
 
 #include <net/fib_rules.h>
+#include <net/inet_dscp.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
@@ -25,14 +26,14 @@ struct fib6_rule {
 	struct fib_rule		common;
 	struct rt6key		src;
 	struct rt6key		dst;
-	u8			tclass;
+	dscp_t			dscp;
 };
 
 static bool fib6_rule_matchall(const struct fib_rule *rule)
 {
 	struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
 
-	if (r->dst.plen || r->src.plen || r->tclass)
+	if (r->dst.plen || r->src.plen || r->dscp)
 		return false;
 	return fib_rule_matchall(rule);
 }
@@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
 			return 0;
 	}
 
-	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
+	if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
 		return 0;
 
 	if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
@@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
+	if (!inet_validate_dscp(frh->tos)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid dsfield (tos): ECN bits must be 0");
+		goto errout;
+	}
+	rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+
 	if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
 		if (rule->table == RT6_TABLE_UNSPEC) {
 			NL_SET_ERR_MSG(extack, "Invalid table");
@@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 
 	rule6->src.plen = frh->src_len;
 	rule6->dst.plen = frh->dst_len;
-	rule6->tclass = frh->tos;
 
 	if (fib_rule_requires_fldissect(rule))
 		net->ipv6.fib6_rules_require_fldissect++;
@@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
 		return 0;
 
-	if (frh->tos && (rule6->tclass != frh->tos))
+	if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
 		return 0;
 
 	if (frh->src_len &&
@@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 
 	frh->dst_len = rule6->dst.plen;
 	frh->src_len = rule6->src.plen;
-	frh->tos = rule6->tclass;
+	frh->tos = inet_dscp_to_dsfield(rule6->dscp);
 
 	if ((rule6->dst.plen &&
 	     nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -486,16 +493,21 @@ out_fib6_rules_ops:
 	goto out;
 }
 
-static void __net_exit fib6_rules_net_exit(struct net *net)
+static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list)
 {
+	struct net *net;
+
 	rtnl_lock();
-	fib_rules_unregister(net->ipv6.fib6_rules_ops);
+	list_for_each_entry(net, net_list, exit_list) {
+		fib_rules_unregister(net->ipv6.fib6_rules_ops);
+		cond_resched();
+	}
 	rtnl_unlock();
 }
 
 static struct pernet_operations fib6_rules_net_ops = {
 	.init = fib6_rules_net_init,
-	.exit = fib6_rules_net_exit,
+	.exit_batch = fib6_rules_net_exit_batch,
 };
 
 int __init fib6_rules_init(void)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 4514444e96c8..4740afecf7c6 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
 {
 	int err = 0;
 
-	if (sk->sk_state != TCP_CLOSE) {
-		local_bh_disable();
+	if (sk->sk_state != TCP_CLOSE)
 		err = __inet_hash(sk, NULL);
-		local_bh_enable();
-	}
 
 	return err;
 }
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f90a87389fcc..f6f5b83dd954 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -32,13 +32,25 @@ struct ioam6_lwt_encap {
 	struct ioam6_trace_hdr traceh;
 } __packed;
 
+struct ioam6_lwt_freq {
+	u32 k;
+	u32 n;
+};
+
 struct ioam6_lwt {
 	struct dst_cache cache;
+	struct ioam6_lwt_freq freq;
+	atomic_t pkt_cnt;
 	u8 mode;
 	struct in6_addr tundst;
 	struct ioam6_lwt_encap	tuninfo;
 };
 
+static struct netlink_range_validation freq_range = {
+	.min = IOAM6_IPTUNNEL_FREQ_MIN,
+	.max = IOAM6_IPTUNNEL_FREQ_MAX,
+};
+
 static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
 {
 	return (struct ioam6_lwt *)lwt->data;
@@ -55,6 +67,8 @@ static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
 }
 
 static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+	[IOAM6_IPTUNNEL_FREQ_K] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
+	[IOAM6_IPTUNNEL_FREQ_N] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
 	[IOAM6_IPTUNNEL_MODE]	= NLA_POLICY_RANGE(NLA_U8,
 						   IOAM6_IPTUNNEL_MODE_MIN,
 						   IOAM6_IPTUNNEL_MODE_MAX),
@@ -96,6 +110,7 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
 	struct lwtunnel_state *lwt;
 	struct ioam6_lwt *ilwt;
 	int len_aligned, err;
+	u32 freq_k, freq_n;
 	u8 mode;
 
 	if (family != AF_INET6)
@@ -106,6 +121,23 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
 	if (err < 0)
 		return err;
 
+	if ((!tb[IOAM6_IPTUNNEL_FREQ_K] && tb[IOAM6_IPTUNNEL_FREQ_N]) ||
+	    (tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N])) {
+		NL_SET_ERR_MSG(extack, "freq: missing parameter");
+		return -EINVAL;
+	} else if (!tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N]) {
+		freq_k = IOAM6_IPTUNNEL_FREQ_MIN;
+		freq_n = IOAM6_IPTUNNEL_FREQ_MIN;
+	} else {
+		freq_k = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_K]);
+		freq_n = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_N]);
+
+		if (freq_k > freq_n) {
+			NL_SET_ERR_MSG(extack, "freq: k > n is forbidden");
+			return -EINVAL;
+		}
+	}
+
 	if (!tb[IOAM6_IPTUNNEL_MODE])
 		mode = IOAM6_IPTUNNEL_MODE_INLINE;
 	else
@@ -140,6 +172,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
 		return err;
 	}
 
+	atomic_set(&ilwt->pkt_cnt, 0);
+	ilwt->freq.k = freq_k;
+	ilwt->freq.n = freq_n;
+
 	ilwt->mode = mode;
 	if (tb[IOAM6_IPTUNNEL_DST])
 		ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
@@ -263,11 +299,18 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	struct in6_addr orig_daddr;
 	struct ioam6_lwt *ilwt;
 	int err = -EINVAL;
+	u32 pkt_cnt;
 
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto drop;
 
 	ilwt = ioam6_lwt_state(dst->lwtstate);
+
+	/* Check for insertion frequency (i.e., "k over n" insertions) */
+	pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
+	if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
+		goto out;
+
 	orig_daddr = ipv6_hdr(skb)->daddr;
 
 	switch (ilwt->mode) {
@@ -358,6 +401,14 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
 	struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
 	int err;
 
+	err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_K, ilwt->freq.k);
+	if (err)
+		goto ret;
+
+	err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_N, ilwt->freq.n);
+	if (err)
+		goto ret;
+
 	err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
 	if (err)
 		goto ret;
@@ -379,7 +430,9 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
 	struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
 	int nlsize;
 
-	nlsize = nla_total_size(sizeof(ilwt->mode)) +
+	nlsize = nla_total_size(sizeof(ilwt->freq.k)) +
+		  nla_total_size(sizeof(ilwt->freq.n)) +
+		  nla_total_size(sizeof(ilwt->mode)) +
 		  nla_total_size(sizeof(ilwt->tuninfo.traceh));
 
 	if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
@@ -395,7 +448,9 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 	struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
 	struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
 
-	return (ilwt_a->mode != ilwt_b->mode ||
+	return (ilwt_a->freq.k != ilwt_b->freq.k ||
+		ilwt_a->freq.n != ilwt_b->freq.n ||
+		ilwt_a->mode != ilwt_b->mode ||
 		(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
 		 !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
 		trace_a->namespace_id != trace_b->namespace_id);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 80256717868e..d4b1e2c5aa76 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -508,7 +508,7 @@ int ip6_mc_input(struct sk_buff *skb)
 	/*
 	 *      IPv6 multicast router mode is now supported ;)
 	 */
-	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+	if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
 	    !(ipv6_addr_type(&hdr->daddr) &
 	      (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
 	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b47ffc81f9e5..53f632a560ec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1122,7 +1122,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 			memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
 			neigh_release(neigh);
 		} else if (skb->protocol == htons(ETH_P_IP)) {
-			struct rtable *rt = skb_rtable(skb);
+			const struct rtable *rt = skb_rtable(skb);
+
+			if (!rt)
+				goto tx_err_link_failure;
 
 			if (rt->rt_gw_family == AF_INET6)
 				memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7cf73e60e619..881fe6b50307 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -253,13 +253,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 {
 	struct mr_table *mrt, *next;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 		list_del(&mrt->list);
 		ip6mr_free_table(mrt);
 	}
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
-	rtnl_unlock();
 }
 
 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -316,10 +315,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-	rtnl_lock();
+	ASSERT_RTNL();
 	ip6mr_free_table(net->ipv6.mrt6);
 	net->ipv6.mrt6 = NULL;
-	rtnl_unlock();
 }
 
 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -732,7 +730,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 
 	in6_dev = __in6_dev_get(dev);
 	if (in6_dev) {
-		in6_dev->cnf.mc_forwarding--;
+		atomic_dec(&in6_dev->cnf.mc_forwarding);
 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 					     NETCONFA_MC_FORWARDING,
 					     dev->ifindex, &in6_dev->cnf);
@@ -900,7 +898,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
 
 	in6_dev = __in6_dev_get(dev);
 	if (in6_dev) {
-		in6_dev->cnf.mc_forwarding++;
+		atomic_inc(&in6_dev->cnf.mc_forwarding);
 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 					     NETCONFA_MC_FORWARDING,
 					     dev->ifindex, &in6_dev->cnf);
@@ -1323,7 +1321,9 @@ static int __net_init ip6mr_net_init(struct net *net)
 proc_cache_fail:
 	remove_proc_entry("ip6_mr_vif", net->proc_net);
 proc_vif_fail:
+	rtnl_lock();
 	ip6mr_rules_exit(net);
+	rtnl_unlock();
 #endif
 ip6mr_rules_fail:
 	ip6mr_notifier_exit(net);
@@ -1336,13 +1336,23 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 	remove_proc_entry("ip6_mr_cache", net->proc_net);
 	remove_proc_entry("ip6_mr_vif", net->proc_net);
 #endif
-	ip6mr_rules_exit(net);
 	ip6mr_notifier_exit(net);
 }
 
+static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
+{
+	struct net *net;
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list)
+		ip6mr_rules_exit(net);
+	rtnl_unlock();
+}
+
 static struct pernet_operations ip6mr_net_ops = {
 	.init = ip6mr_net_init,
 	.exit = ip6mr_net_exit,
+	.exit_batch = ip6mr_net_exit_batch,
 };
 
 int __init ip6_mr_init(void)
@@ -1551,7 +1561,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 	} else {
 		rcu_assign_pointer(mrt->mroute_sk, sk);
 		sock_set_flag(sk, SOCK_RCU_FREE);
-		net->ipv6.devconf_all->mc_forwarding++;
+		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
 	}
 	write_unlock_bh(&mrt_lock);
 
@@ -1567,14 +1577,19 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 
 int ip6mr_sk_done(struct sock *sk)
 {
-	int err = -EACCES;
 	struct net *net = sock_net(sk);
+	struct ipv6_devconf *devconf;
 	struct mr_table *mrt;
+	int err = -EACCES;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 		return err;
 
+	devconf = net->ipv6.devconf_all;
+	if (!devconf || !atomic_read(&devconf->mc_forwarding))
+		return err;
+
 	rtnl_lock();
 	ip6mr_for_each_table(mrt, net) {
 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
@@ -1584,7 +1599,7 @@ int ip6mr_sk_done(struct sock *sk)
 			 * so the RCU grace period before sk freeing
 			 * is guaranteed by sk_destruct()
 			 */
-			net->ipv6.devconf_all->mc_forwarding--;
+			atomic_dec(&devconf->mc_forwarding);
 			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
 						     NETCONFA_MC_FORWARDING,
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index c921de63b494..f0702d920d8d 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2021 Google
  */
 
+#include <linux/compat.h>
 #include <linux/if_arp.h>
 #include <linux/net.h>
 #include <linux/mctp.h>
@@ -21,6 +22,8 @@
 
 /* socket implementation */
 
+static void mctp_sk_expire_keys(struct timer_list *timer);
+
 static int mctp_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -99,13 +102,20 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	struct sk_buff *skb;
 
 	if (addr) {
+		const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER |
+			MCTP_TAG_PREALLOC;
+
 		if (addrlen < sizeof(struct sockaddr_mctp))
 			return -EINVAL;
 		if (addr->smctp_family != AF_MCTP)
 			return -EINVAL;
 		if (!mctp_sockaddr_is_ok(addr))
 			return -EINVAL;
-		if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+		if (addr->smctp_tag & ~tagbits)
+			return -EINVAL;
+		/* can't preallocate a non-owned tag */
+		if (addr->smctp_tag & MCTP_TAG_PREALLOC &&
+		    !(addr->smctp_tag & MCTP_TAG_OWNER))
 			return -EINVAL;
 
 	} else {
@@ -248,6 +258,32 @@ out_free:
 	return rc;
 }
 
+/* We're done with the key; invalidate, stop reassembly, and remove from lists.
+ */
+static void __mctp_key_remove(struct mctp_sk_key *key, struct net *net,
+			      unsigned long flags, unsigned long reason)
+__releases(&key->lock)
+__must_hold(&net->mctp.keys_lock)
+{
+	struct sk_buff *skb;
+
+	trace_mctp_key_release(key, reason);
+	skb = key->reasm_head;
+	key->reasm_head = NULL;
+	key->reasm_dead = true;
+	key->valid = false;
+	mctp_dev_release_key(key->dev, key);
+	spin_unlock_irqrestore(&key->lock, flags);
+
+	hlist_del(&key->hlist);
+	hlist_del(&key->sklist);
+
+	/* unref for the lists */
+	mctp_key_unref(key);
+
+	kfree_skb(skb);
+}
+
 static int mctp_setsockopt(struct socket *sock, int level, int optname,
 			   sockptr_t optval, unsigned int optlen)
 {
@@ -293,6 +329,115 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname,
 	return -EINVAL;
 }
 
+static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
+{
+	struct net *net = sock_net(&msk->sk);
+	struct mctp_sk_key *key = NULL;
+	struct mctp_ioc_tag_ctl ctl;
+	unsigned long flags;
+	u8 tag;
+
+	if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+		return -EFAULT;
+
+	if (ctl.tag)
+		return -EINVAL;
+
+	if (ctl.flags)
+		return -EINVAL;
+
+	key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY,
+				   true, &tag);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC;
+	if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) {
+		spin_lock_irqsave(&key->lock, flags);
+		__mctp_key_remove(key, net, flags, MCTP_TRACE_KEY_DROPPED);
+		mctp_key_unref(key);
+		return -EFAULT;
+	}
+
+	mctp_key_unref(key);
+	return 0;
+}
+
+static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
+{
+	struct net *net = sock_net(&msk->sk);
+	struct mctp_ioc_tag_ctl ctl;
+	unsigned long flags, fl2;
+	struct mctp_sk_key *key;
+	struct hlist_node *tmp;
+	int rc;
+	u8 tag;
+
+	if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+		return -EFAULT;
+
+	if (ctl.flags)
+		return -EINVAL;
+
+	/* Must be a local tag, TO set, preallocated */
+	if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC))
+		return -EINVAL;
+
+	tag = ctl.tag & MCTP_TAG_MASK;
+	rc = -EINVAL;
+
+	spin_lock_irqsave(&net->mctp.keys_lock, flags);
+	hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+		/* we do an irqsave here, even though we know the irq state,
+		 * so we have the flags to pass to __mctp_key_remove
+		 */
+		spin_lock_irqsave(&key->lock, fl2);
+		if (key->manual_alloc &&
+		    ctl.peer_addr == key->peer_addr &&
+		    tag == key->tag) {
+			__mctp_key_remove(key, net, fl2,
+					  MCTP_TRACE_KEY_DROPPED);
+			rc = 0;
+		} else {
+			spin_unlock_irqrestore(&key->lock, fl2);
+		}
+	}
+	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+	return rc;
+}
+
+static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+
+	switch (cmd) {
+	case SIOCMCTPALLOCTAG:
+		return mctp_ioctl_alloctag(msk, arg);
+	case SIOCMCTPDROPTAG:
+		return mctp_ioctl_droptag(msk, arg);
+	}
+
+	return -EINVAL;
+}
+
+#ifdef CONFIG_COMPAT
+static int mctp_compat_ioctl(struct socket *sock, unsigned int cmd,
+			     unsigned long arg)
+{
+	void __user *argp = compat_ptr(arg);
+
+	switch (cmd) {
+	/* These have compatible ptr layouts */
+	case SIOCMCTPALLOCTAG:
+	case SIOCMCTPDROPTAG:
+		return mctp_ioctl(sock, cmd, (unsigned long)argp);
+	}
+
+	return -ENOIOCTLCMD;
+}
+#endif
+
 static const struct proto_ops mctp_dgram_ops = {
 	.family		= PF_MCTP,
 	.release	= mctp_release,
@@ -302,7 +447,7 @@ static const struct proto_ops mctp_dgram_ops = {
 	.accept		= sock_no_accept,
 	.getname	= sock_no_getname,
 	.poll		= datagram_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= mctp_ioctl,
 	.gettstamp	= sock_gettstamp,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
@@ -312,6 +457,9 @@ static const struct proto_ops mctp_dgram_ops = {
 	.recvmsg	= mctp_recvmsg,
 	.mmap		= sock_no_mmap,
 	.sendpage	= sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= mctp_compat_ioctl,
+#endif
 };
 
 static void mctp_sk_expire_keys(struct timer_list *timer)
@@ -319,7 +467,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
 	struct mctp_sock *msk = container_of(timer, struct mctp_sock,
 					     key_expiry);
 	struct net *net = sock_net(&msk->sk);
-	unsigned long next_expiry, flags;
+	unsigned long next_expiry, flags, fl2;
 	struct mctp_sk_key *key;
 	struct hlist_node *tmp;
 	bool next_expiry_valid = false;
@@ -327,15 +475,16 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
 	spin_lock_irqsave(&net->mctp.keys_lock, flags);
 
 	hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
-		spin_lock(&key->lock);
+		/* don't expire. manual_alloc is immutable, no locking
+		 * required.
+		 */
+		if (key->manual_alloc)
+			continue;
 
+		spin_lock_irqsave(&key->lock, fl2);
 		if (!time_after_eq(key->expiry, jiffies)) {
-			trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT);
-			key->valid = false;
-			hlist_del_rcu(&key->hlist);
-			hlist_del_rcu(&key->sklist);
-			spin_unlock(&key->lock);
-			mctp_key_unref(key);
+			__mctp_key_remove(key, net, fl2,
+					  MCTP_TRACE_KEY_TIMEOUT);
 			continue;
 		}
 
@@ -346,7 +495,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
 			next_expiry = key->expiry;
 			next_expiry_valid = true;
 		}
-		spin_unlock(&key->lock);
+		spin_unlock_irqrestore(&key->lock, fl2);
 	}
 
 	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
@@ -387,9 +536,9 @@ static void mctp_sk_unhash(struct sock *sk)
 {
 	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
 	struct net *net = sock_net(sk);
+	unsigned long flags, fl2;
 	struct mctp_sk_key *key;
 	struct hlist_node *tmp;
-	unsigned long flags;
 
 	/* remove from any type-based binds */
 	mutex_lock(&net->mctp.bind_lock);
@@ -399,20 +548,8 @@ static void mctp_sk_unhash(struct sock *sk)
 	/* remove tag allocations */
 	spin_lock_irqsave(&net->mctp.keys_lock, flags);
 	hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
-		hlist_del(&key->sklist);
-		hlist_del(&key->hlist);
-
-		trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED);
-
-		spin_lock(&key->lock);
-		kfree_skb(key->reasm_head);
-		key->reasm_head = NULL;
-		key->reasm_dead = true;
-		key->valid = false;
-		spin_unlock(&key->lock);
-
-		/* key is no longer on the lookup lists, unref */
-		mctp_key_unref(key);
+		spin_lock_irqsave(&key->lock, fl2);
+		__mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
 	}
 	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
 }
diff --git a/net/mctp/device.c b/net/mctp/device.c
index ef2755f82f87..02ddc0f1bd3e 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2021 Google
  */
 
+#include <linux/if_arp.h>
 #include <linux/if_link.h>
 #include <linux/mctp.h>
 #include <linux/netdevice.h>
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 8d9f4ff3e285..17e3482aa770 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -64,8 +64,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
 		if (msk->bind_type != type)
 			continue;
 
-		if (msk->bind_addr != MCTP_ADDR_ANY &&
-		    msk->bind_addr != mh->dest)
+		if (!mctp_address_matches(msk->bind_addr, mh->dest))
 			continue;
 
 		return msk;
@@ -77,7 +76,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
 			   mctp_eid_t peer, u8 tag)
 {
-	if (key->local_addr != local)
+	if (!mctp_address_matches(key->local_addr, local))
 		return false;
 
 	if (key->peer_addr != peer)
@@ -204,29 +203,38 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
 	return rc;
 }
 
-/* We're done with the key; unset valid and remove from lists. There may still
- * be outstanding refs on the key though...
+/* Helper for mctp_route_input().
+ * We're done with the key; unlock and unref the key.
+ * For the usual case of automatic expiry we remove the key from lists.
+ * In the case that manual allocation is set on a key we release the lock
+ * and local ref, reset reassembly, but don't remove from lists.
  */
-static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
-				   unsigned long flags)
-	__releases(&key->lock)
+static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
+			       unsigned long flags, unsigned long reason)
+__releases(&key->lock)
 {
 	struct sk_buff *skb;
 
+	trace_mctp_key_release(key, reason);
 	skb = key->reasm_head;
 	key->reasm_head = NULL;
-	key->reasm_dead = true;
-	key->valid = false;
-	mctp_dev_release_key(key->dev, key);
+
+	if (!key->manual_alloc) {
+		key->reasm_dead = true;
+		key->valid = false;
+		mctp_dev_release_key(key->dev, key);
+	}
 	spin_unlock_irqrestore(&key->lock, flags);
 
-	spin_lock_irqsave(&net->mctp.keys_lock, flags);
-	hlist_del(&key->hlist);
-	hlist_del(&key->sklist);
-	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+	if (!key->manual_alloc) {
+		spin_lock_irqsave(&net->mctp.keys_lock, flags);
+		hlist_del(&key->hlist);
+		hlist_del(&key->sklist);
+		spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
 
-	/* one unref for the lists */
-	mctp_key_unref(key);
+		/* unref for the lists */
+		mctp_key_unref(key);
+	}
 
 	/* and one for the local reference */
 	mctp_key_unref(key);
@@ -380,9 +388,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
 				/* we've hit a pending reassembly; not much we
 				 * can do but drop it
 				 */
-				trace_mctp_key_release(key,
-						       MCTP_TRACE_KEY_REPLIED);
-				__mctp_key_unlock_drop(key, net, f);
+				__mctp_key_done_in(key, net, f,
+						   MCTP_TRACE_KEY_REPLIED);
 				key = NULL;
 			}
 			rc = 0;
@@ -424,9 +431,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
 		} else {
 			if (key->reasm_head || key->reasm_dead) {
 				/* duplicate start? drop everything */
-				trace_mctp_key_release(key,
-						       MCTP_TRACE_KEY_INVALIDATED);
-				__mctp_key_unlock_drop(key, net, f);
+				__mctp_key_done_in(key, net, f,
+						   MCTP_TRACE_KEY_INVALIDATED);
 				rc = -EEXIST;
 				key = NULL;
 			} else {
@@ -449,10 +455,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
 		 * the reassembly/response key
 		 */
 		if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+			msk = container_of(key->sk, struct mctp_sock, sk);
 			sock_queue_rcv_skb(key->sk, key->reasm_head);
 			key->reasm_head = NULL;
-			trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED);
-			__mctp_key_unlock_drop(key, net, f);
+			__mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
 			key = NULL;
 		}
 
@@ -580,9 +586,9 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
  * it for the socket msk
  */
-static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
-						mctp_eid_t saddr,
-						mctp_eid_t daddr, u8 *tagp)
+struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+					 mctp_eid_t daddr, mctp_eid_t saddr,
+					 bool manual, u8 *tagp)
 {
 	struct net *net = sock_net(&msk->sk);
 	struct netns_mctp *mns = &net->mctp;
@@ -616,9 +622,8 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
 		if (tmp->tag & MCTP_HDR_FLAG_TO)
 			continue;
 
-		if (!((tmp->peer_addr == daddr ||
-		       tmp->peer_addr == MCTP_ADDR_ANY) &&
-		       tmp->local_addr == saddr))
+		if (!(mctp_address_matches(tmp->peer_addr, daddr) &&
+		      mctp_address_matches(tmp->local_addr, saddr)))
 			continue;
 
 		spin_lock(&tmp->lock);
@@ -638,6 +643,7 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
 		mctp_reserve_tag(net, key, msk);
 		trace_mctp_key_acquire(key);
 
+		key->manual_alloc = manual;
 		*tagp = key->tag;
 	}
 
@@ -651,6 +657,50 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
 	return key;
 }
 
+static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
+						    mctp_eid_t daddr,
+						    u8 req_tag, u8 *tagp)
+{
+	struct net *net = sock_net(&msk->sk);
+	struct netns_mctp *mns = &net->mctp;
+	struct mctp_sk_key *key, *tmp;
+	unsigned long flags;
+
+	req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
+	key = NULL;
+
+	spin_lock_irqsave(&mns->keys_lock, flags);
+
+	hlist_for_each_entry(tmp, &mns->keys, hlist) {
+		if (tmp->tag != req_tag)
+			continue;
+
+		if (!mctp_address_matches(tmp->peer_addr, daddr))
+			continue;
+
+		if (!tmp->manual_alloc)
+			continue;
+
+		spin_lock(&tmp->lock);
+		if (tmp->valid) {
+			key = tmp;
+			refcount_inc(&key->refs);
+			spin_unlock(&tmp->lock);
+			break;
+		}
+		spin_unlock(&tmp->lock);
+	}
+	spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+	if (!key)
+		return ERR_PTR(-ENOENT);
+
+	if (tagp)
+		*tagp = key->tag;
+
+	return key;
+}
+
 /* routing lookups */
 static bool mctp_rt_match_eid(struct mctp_route *rt,
 			      unsigned int net, mctp_eid_t eid)
@@ -845,8 +895,14 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 	if (rc)
 		goto out_release;
 
-	if (req_tag & MCTP_HDR_FLAG_TO) {
-		key = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+	if (req_tag & MCTP_TAG_OWNER) {
+		if (req_tag & MCTP_TAG_PREALLOC)
+			key = mctp_lookup_prealloc_tag(msk, daddr,
+						       req_tag, &tag);
+		else
+			key = mctp_alloc_local_tag(msk, daddr, saddr,
+						   false, &tag);
+
 		if (IS_ERR(key)) {
 			rc = PTR_ERR(key);
 			goto out_release;
@@ -857,7 +913,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 		tag |= MCTP_HDR_FLAG_TO;
 	} else {
 		key = NULL;
-		tag = req_tag;
+		tag = req_tag & MCTP_TAG_MASK;
 	}
 
 	skb->protocol = htons(ETH_P_MCTP);
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 750f9f9b4daf..61205cf40074 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -369,14 +369,15 @@ static void mctp_test_route_input_sk(struct kunit *test)
 
 #define FL_S	(MCTP_HDR_FLAG_SOM)
 #define FL_E	(MCTP_HDR_FLAG_EOM)
-#define FL_T	(MCTP_HDR_FLAG_TO)
+#define FL_TO	(MCTP_HDR_FLAG_TO)
+#define FL_T(t)	((t) & MCTP_HDR_TAG_MASK)
 
 static const struct mctp_route_input_sk_test mctp_route_input_sk_tests[] = {
-	{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 0, .deliver = true },
-	{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 1, .deliver = false },
+	{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 0, .deliver = true },
+	{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 1, .deliver = false },
 	{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E), .type = 0, .deliver = false },
-	{ .hdr = RX_HDR(1, 10, 8, FL_E | FL_T), .type = 0, .deliver = false },
-	{ .hdr = RX_HDR(1, 10, 8, FL_T), .type = 0, .deliver = false },
+	{ .hdr = RX_HDR(1, 10, 8, FL_E | FL_TO), .type = 0, .deliver = false },
+	{ .hdr = RX_HDR(1, 10, 8, FL_TO), .type = 0, .deliver = false },
 	{ .hdr = RX_HDR(1, 10, 8, 0), .type = 0, .deliver = false },
 };
 
@@ -436,7 +437,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
 	__mctp_route_test_fini(test, dev, rt, sock);
 }
 
-#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_T | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
+#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
 
 static const struct mctp_route_input_sk_reasm_test mctp_route_input_sk_reasm_tests[] = {
 	{
@@ -522,12 +523,156 @@ static void mctp_route_input_sk_reasm_to_desc(
 KUNIT_ARRAY_PARAM(mctp_route_input_sk_reasm, mctp_route_input_sk_reasm_tests,
 		  mctp_route_input_sk_reasm_to_desc);
 
+struct mctp_route_input_sk_keys_test {
+	const char	*name;
+	mctp_eid_t	key_peer_addr;
+	mctp_eid_t	key_local_addr;
+	u8		key_tag;
+	struct mctp_hdr hdr;
+	bool		deliver;
+};
+
+/* test packet rx in the presence of various key configurations */
+static void mctp_test_route_input_sk_keys(struct kunit *test)
+{
+	const struct mctp_route_input_sk_keys_test *params;
+	struct mctp_test_route *rt;
+	struct sk_buff *skb, *skb2;
+	struct mctp_test_dev *dev;
+	struct mctp_sk_key *key;
+	struct netns_mctp *mns;
+	struct mctp_sock *msk;
+	struct socket *sock;
+	unsigned long flags;
+	int rc;
+	u8 c;
+
+	params = test->param_value;
+
+	dev = mctp_test_create_dev();
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+	rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+	KUNIT_ASSERT_EQ(test, rc, 0);
+
+	msk = container_of(sock->sk, struct mctp_sock, sk);
+	mns = &sock_net(sock->sk)->mctp;
+
+	/* set the incoming tag according to test params */
+	key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr,
+			     params->key_tag, GFP_KERNEL);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key);
+
+	spin_lock_irqsave(&mns->keys_lock, flags);
+	mctp_reserve_tag(&init_net, key, msk);
+	spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+	/* create packet and route */
+	c = 0;
+	skb = mctp_test_create_skb_data(&params->hdr, &c);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+	skb->dev = dev->ndev;
+	__mctp_cb(skb);
+
+	rc = mctp_route_input(&rt->rt, skb);
+
+	/* (potentially) receive message */
+	skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+
+	if (params->deliver)
+		KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
+	else
+		KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+
+	if (skb2)
+		skb_free_datagram(sock->sk, skb2);
+
+	mctp_key_unref(key);
+	__mctp_route_test_fini(test, dev, rt, sock);
+}
+
+static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = {
+	{
+		.name = "direct match",
+		.key_peer_addr = 9,
+		.key_local_addr = 8,
+		.key_tag = 1,
+		.hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+		.deliver = true,
+	},
+	{
+		.name = "flipped src/dest",
+		.key_peer_addr = 8,
+		.key_local_addr = 9,
+		.key_tag = 1,
+		.hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+		.deliver = false,
+	},
+	{
+		.name = "peer addr mismatch",
+		.key_peer_addr = 9,
+		.key_local_addr = 8,
+		.key_tag = 1,
+		.hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T(1)),
+		.deliver = false,
+	},
+	{
+		.name = "tag value mismatch",
+		.key_peer_addr = 9,
+		.key_local_addr = 8,
+		.key_tag = 1,
+		.hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(2)),
+		.deliver = false,
+	},
+	{
+		.name = "TO mismatch",
+		.key_peer_addr = 9,
+		.key_local_addr = 8,
+		.key_tag = 1,
+		.hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO),
+		.deliver = false,
+	},
+	{
+		.name = "broadcast response",
+		.key_peer_addr = MCTP_ADDR_ANY,
+		.key_local_addr = 8,
+		.key_tag = 1,
+		.hdr = RX_HDR(1, 11, 8, FL_S | FL_E | FL_T(1)),
+		.deliver = true,
+	},
+	{
+		.name = "any local match",
+		.key_peer_addr = 12,
+		.key_local_addr = MCTP_ADDR_ANY,
+		.key_tag = 1,
+		.hdr = RX_HDR(1, 12, 8, FL_S | FL_E | FL_T(1)),
+		.deliver = true,
+	},
+};
+
+static void mctp_route_input_sk_keys_to_desc(
+				const struct mctp_route_input_sk_keys_test *t,
+				char *desc)
+{
+	sprintf(desc, "%s", t->name);
+}
+
+KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests,
+		  mctp_route_input_sk_keys_to_desc);
+
 static struct kunit_case mctp_test_cases[] = {
 	KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
 	KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
 	KUNIT_CASE_PARAM(mctp_test_route_input_sk, mctp_route_input_sk_gen_params),
 	KUNIT_CASE_PARAM(mctp_test_route_input_sk_reasm,
 			 mctp_route_input_sk_reasm_gen_params),
+	KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys,
+			 mctp_route_input_sk_keys_gen_params),
 	{}
 };
 
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d47795748ad7..5464c2d268bd 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1174,14 +1174,8 @@ skip_family:
 	if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
 		entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
 
-	if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
-		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
-			NL_SET_ERR_MSG_ATTR(info->extack, attr,
-					    "flags must have signal when using port");
-			return -EINVAL;
-		}
+	if (tb[MPTCP_PM_ADDR_ATTR_PORT])
 		entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
-	}
 
 	return 0;
 }
@@ -1227,6 +1221,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
 	if (ret < 0)
 		return ret;
 
+	if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+		GENL_SET_ERR_MSG(info, "flags must have signal when using port");
+		return -EINVAL;
+	}
+
 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry) {
 		GENL_SET_ERR_MSG(info, "can't allocate addr");
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 354cb472f386..d1c9dfbb11fa 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -621,7 +621,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 		case NF_ACCEPT:
 			break;
 		case NF_DROP:
-			kfree_skb(skb);
+			kfree_skb_reason(skb,
+					 SKB_DROP_REASON_NETFILTER_DROP);
 			ret = NF_DROP_GETERR(verdict);
 			if (ret == 0)
 				ret = -EPERM;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 67ad08320886..7e8a39a35627 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -37,6 +37,7 @@
 #include <net/genetlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/pkt_cls.h>
 
 #include "datapath.h"
 #include "flow.h"
@@ -1601,8 +1602,6 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
 	dp->user_features = 0;
 }
 
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 static int ovs_dp_set_upcall_portids(struct datapath *dp,
 			      const struct nlattr *ids)
 {
@@ -1657,7 +1656,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
 
 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 {
-	u32 user_features = 0;
+	u32 user_features = 0, old_features = dp->user_features;
 	int err;
 
 	if (a[OVS_DP_ATTR_USER_FEATURES]) {
@@ -1696,10 +1695,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 			return err;
 	}
 
-	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
-		static_branch_enable(&tc_recirc_sharing_support);
-	else
-		static_branch_disable(&tc_recirc_sharing_support);
+	if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+	    !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+		tc_skb_ext_tc_enable();
+	else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+		 (old_features & OVS_DP_F_TC_RECIRC_SHARING))
+		tc_skb_ext_tc_disable();
 
 	return 0;
 }
@@ -1839,6 +1840,9 @@ static void __dp_destroy(struct datapath *dp)
 	struct flow_table *table = &dp->table;
 	int i;
 
+	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+		tc_skb_ext_tc_disable();
+
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
 		struct hlist_node *n;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index fcfe6cb46441..0cd29971a907 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 02096f2ec678..f6cd24fd530c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include <net/mpls.h>
 #include <net/ndisc.h>
 #include <net/nsh.h>
+#include <net/pkt_cls.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
 #include "conntrack.h"
@@ -895,7 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 	key->mac_proto = res;
 
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-	if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+	if (tc_skb_ext_tc_enabled()) {
 		tc_ext = skb_ext_find(skb, TC_SKB_EXT);
 		key->recirc_id = tc_ext ? tc_ext->chain : 0;
 		OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5f0f346b576f..ff1e6b474fef 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -49,6 +49,23 @@ static LIST_HEAD(tcf_proto_base);
 /* Protects list of registered TC modules. It is pure SMP lock. */
 static DEFINE_RWLOCK(cls_mod_lock);
 
+#ifdef CONFIG_NET_CLS_ACT
+DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+EXPORT_SYMBOL(tc_skb_ext_tc);
+
+void tc_skb_ext_tc_enable(void)
+{
+	static_branch_inc(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_enable);
+
+void tc_skb_ext_tc_disable(void)
+{
+	static_branch_dec(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_disable);
+#endif
+
 static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
 {
 	return jhash_3words(tp->chain->index, tp->prio,
@@ -1615,19 +1632,21 @@ int tcf_classify(struct sk_buff *skb,
 	ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
 			     &last_executed_chain);
 
-	/* If we missed on some chain */
-	if (ret == TC_ACT_UNSPEC && last_executed_chain) {
-		struct tc_skb_cb *cb = tc_skb_cb(skb);
-
-		ext = tc_skb_ext_alloc(skb);
-		if (WARN_ON_ONCE(!ext))
-			return TC_ACT_SHOT;
-		ext->chain = last_executed_chain;
-		ext->mru = cb->mru;
-		ext->post_ct = cb->post_ct;
-		ext->post_ct_snat = cb->post_ct_snat;
-		ext->post_ct_dnat = cb->post_ct_dnat;
-		ext->zone = cb->zone;
+	if (tc_skb_ext_tc_enabled()) {
+		/* If we missed on some chain */
+		if (ret == TC_ACT_UNSPEC && last_executed_chain) {
+			struct tc_skb_cb *cb = tc_skb_cb(skb);
+
+			ext = tc_skb_ext_alloc(skb);
+			if (WARN_ON_ONCE(!ext))
+				return TC_ACT_SHOT;
+			ext->chain = last_executed_chain;
+			ext->mru = cb->mru;
+			ext->post_ct = cb->post_ct;
+			ext->post_ct_snat = cb->post_ct_snat;
+			ext->post_ct_dnat = cb->post_ct_dnat;
+			ext->zone = cb->zone;
+		}
 	}
 
 	return ret;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index efc84845bb6b..0024a692f0f8 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1433,7 +1433,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 
 	if (*zc && (out_iov || out_sg)) {
 		if (out_iov)
-			n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
+			n_sgout = 1 +
+				iov_iter_npages_cap(out_iov, INT_MAX, data_len);
 		else
 			n_sgout = sg_nents(out_sg);
 		n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 28ef3f4465ae..2abd64e4d589 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -343,9 +343,9 @@ out:
 }
 EXPORT_SYMBOL(xsk_tx_peek_desc);
 
-static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs,
-					u32 max_entries)
+static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries)
 {
+	struct xdp_desc *descs = pool->tx_descs;
 	u32 nb_pkts = 0;
 
 	while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
@@ -355,8 +355,7 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_d
 	return nb_pkts;
 }
 
-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs,
-				   u32 max_entries)
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
 {
 	struct xdp_sock *xs;
 	u32 nb_pkts;
@@ -365,7 +364,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
 	if (!list_is_singular(&pool->xsk_tx_list)) {
 		/* Fallback to the non-batched version */
 		rcu_read_unlock();
-		return xsk_tx_peek_release_fallback(pool, descs, max_entries);
+		return xsk_tx_peek_release_fallback(pool, max_entries);
 	}
 
 	xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
@@ -374,7 +373,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
 		goto out;
 	}
 
-	nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries);
+	nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries);
 	if (!nb_pkts) {
 		xs->tx->queue_empty_descs++;
 		goto out;
@@ -386,7 +385,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
 	 * packets. This avoids having to implement any buffering in
 	 * the Tx path.
 	 */
-	nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts);
+	nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
 	if (!nb_pkts)
 		goto out;
 
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index fd39bb660ebc..b34fca6ada86 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -37,6 +37,7 @@ void xp_destroy(struct xsk_buff_pool *pool)
 	if (!pool)
 		return;
 
+	kvfree(pool->tx_descs);
 	kvfree(pool->heads);
 	kvfree(pool);
 }
@@ -58,6 +59,12 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 	if (!pool->heads)
 		goto out;
 
+	if (xs->tx) {
+		pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL);
+		if (!pool->tx_descs)
+			goto out;
+	}
+
 	pool->chunk_mask = ~((u64)umem->chunk_size - 1);
 	pool->addrs_cnt = umem->size;
 	pool->heads_cnt = umem->chunks;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index e9aa2c236356..801cda5d1938 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -205,11 +205,11 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
 	return false;
 }
 
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
-					    struct xdp_desc *descs,
-					    struct xsk_buff_pool *pool, u32 max)
+static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+					    u32 max)
 {
 	u32 cached_cons = q->cached_cons, nb_entries = 0;
+	struct xdp_desc *descs = pool->tx_descs;
 
 	while (cached_cons != q->cached_prod && nb_entries < max) {
 		struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
@@ -282,12 +282,12 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
 	return xskq_cons_read_desc(q, desc, pool);
 }
 
-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs,
-					    struct xsk_buff_pool *pool, u32 max)
+static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+					    u32 max)
 {
 	u32 entries = xskq_cons_nb_entries(q, max);
 
-	return xskq_cons_read_desc_batch(q, descs, pool, entries);
+	return xskq_cons_read_desc_batch(q, pool, entries);
 }
 
 /* To improve performance in the xskq_cons_release functions, only update local state here.
@@ -304,13 +304,6 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
 	q->cached_cons += cnt;
 }
 
-static inline bool xskq_cons_is_full(struct xsk_queue *q)
-{
-	/* No barriers needed since data is not accessed */
-	return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
-		q->nentries;
-}
-
 static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
 {
 	/* No barriers needed since data is not accessed */
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 319fd31522f3..e69651a6902f 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -413,7 +413,7 @@ static void fixup_map(struct bpf_object *obj)
 		for (i = 0; i < NR_TESTS; i++) {
 			if (!strcmp(test_map_names[i], name) &&
 			    (check_test_flags(i))) {
-				bpf_map__resize(map, num_map_entries);
+				bpf_map__set_max_entries(map, num_map_entries);
 				continue;
 			}
 		}
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 3ec8ad9c1750..631f0cabe139 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -79,13 +79,11 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
-	};
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
 	const char *optstr = "FSN";
 	int prog_fd, map_fd, opt;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	char filename[256];
@@ -123,11 +121,19 @@ int main(int argc, char **argv)
 	}
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	prog_load_attr.file = filename;
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj))
+		return 1;
+
+	prog = bpf_object__next_program(obj, NULL);
+	bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
 
-	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+	err = bpf_object__load(obj);
+	if (err)
 		return 1;
 
+	prog_fd = bpf_program__fd(prog);
+
 	map = bpf_object__next_map(obj, NULL);
 	if (!map) {
 		printf("finding a map in obj file failed\n");
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index 6c61d5f570fb..b3f6e49676ed 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -82,15 +82,13 @@ static void usage(const char *cmd)
 
 int main(int argc, char **argv)
 {
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
-	};
 	unsigned char opt_flags[256] = {};
 	const char *optstr = "i:T:P:SNFh";
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
 	unsigned int kill_after_s = 0;
 	int i, prog_fd, map_fd, opt;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
 	__u32 max_pckt_size = 0;
 	__u32 key = 0;
@@ -148,11 +146,20 @@ int main(int argc, char **argv)
 	}
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	prog_load_attr.file = filename;
 
-	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj))
 		return 1;
 
+	prog = bpf_object__next_program(obj, NULL);
+	bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+	err = bpf_object__load(obj);
+	if (err)
+		return 1;
+
+	prog_fd = bpf_program__fd(prog);
+
 	/* static global var 'max_pcktsz' is accessible from .data section */
 	if (max_pckt_size) {
 		map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data");
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 79ccd9891924..1828487bae9a 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -75,14 +75,11 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
-	};
 	const char *prog_name = "xdp_fwd";
 	struct bpf_program *prog = NULL;
 	struct bpf_program *pos;
 	const char *sec_name;
-	int prog_fd, map_fd = -1;
+	int prog_fd = -1, map_fd = -1;
 	char filename[PATH_MAX];
 	struct bpf_object *obj;
 	int opt, i, idx, err;
@@ -119,7 +116,6 @@ int main(int argc, char **argv)
 
 	if (attach) {
 		snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-		prog_load_attr.file = filename;
 
 		if (access(filename, O_RDONLY) < 0) {
 			printf("error accessing file %s: %s\n",
@@ -127,7 +123,14 @@ int main(int argc, char **argv)
 			return 1;
 		}
 
-		err = bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd);
+		obj = bpf_object__open_file(filename, NULL);
+		if (libbpf_get_error(obj))
+			return 1;
+
+		prog = bpf_object__next_program(obj, NULL);
+		bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+		err = bpf_object__load(obj);
 		if (err) {
 			printf("Does kernel support devmap lookup?\n");
 			/* If not, the error message will be:
diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c
index 25e3a405375f..87c54bfdbb70 100644
--- a/samples/bpf/xdp_redirect_cpu.bpf.c
+++ b/samples/bpf/xdp_redirect_cpu.bpf.c
@@ -491,7 +491,7 @@ int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 }
 
-SEC("xdp_cpumap/redirect")
+SEC("xdp/cpumap")
 int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
@@ -507,19 +507,19 @@ int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
 	return bpf_redirect_map(&tx_port, 0, 0);
 }
 
-SEC("xdp_cpumap/pass")
+SEC("xdp/cpumap")
 int xdp_redirect_cpu_pass(struct xdp_md *ctx)
 {
 	return XDP_PASS;
 }
 
-SEC("xdp_cpumap/drop")
+SEC("xdp/cpumap")
 int xdp_redirect_cpu_drop(struct xdp_md *ctx)
 {
 	return XDP_DROP;
 }
 
-SEC("xdp_devmap/egress")
+SEC("xdp/devmap")
 int xdp_redirect_egress_prog(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index a81704d3317b..5f74a70a9021 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -70,7 +70,7 @@ static void print_avail_progs(struct bpf_object *obj)
 
 	printf(" Programs to be used for -p/--progname:\n");
 	bpf_object__for_each_program(pos, obj) {
-		if (bpf_program__is_xdp(pos)) {
+		if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) {
 			if (!strncmp(bpf_program__name(pos), "xdp_prognum",
 				     sizeof("xdp_prognum") - 1))
 				printf(" %s\n", bpf_program__name(pos));
diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c
index 59efd656e1b2..415bac1758e3 100644
--- a/samples/bpf/xdp_redirect_map.bpf.c
+++ b/samples/bpf/xdp_redirect_map.bpf.c
@@ -68,7 +68,7 @@ int xdp_redirect_map_native(struct xdp_md *ctx)
 	return xdp_redirect_map(ctx, &tx_port_native);
 }
 
-SEC("xdp_devmap/egress")
+SEC("xdp/devmap")
 int xdp_redirect_map_egress(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c
index bb0a5a3bfcf0..8b2fd4ec2c76 100644
--- a/samples/bpf/xdp_redirect_map_multi.bpf.c
+++ b/samples/bpf/xdp_redirect_map_multi.bpf.c
@@ -53,7 +53,7 @@ int xdp_redirect_map_native(struct xdp_md *ctx)
 	return xdp_redirect_map(ctx, &forward_map_native);
 }
 
-SEC("xdp_devmap/egress")
+SEC("xdp/devmap")
 int xdp_devmap_prog(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 2d565ba54b8c..6dae87d83e1c 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -640,12 +640,10 @@ static void usage(const char *prog)
 
 int main(int ac, char **argv)
 {
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
-	};
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
 	const char *optstr = "SF";
+	struct bpf_program *prog;
 	struct bpf_object *obj;
 	char filename[256];
 	char **ifname_list;
@@ -653,7 +651,6 @@ int main(int ac, char **argv)
 	int err, i = 1;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	prog_load_attr.file = filename;
 
 	total_ifindex = ac - 1;
 	ifname_list = (argv + 1);
@@ -684,14 +681,20 @@ int main(int ac, char **argv)
 		return 1;
 	}
 
-	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj))
 		return 1;
 
+	prog = bpf_object__next_program(obj, NULL);
+	bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
 	printf("\n******************loading bpf file*********************\n");
-	if (!prog_fd) {
-		printf("bpf_prog_load_xattr: %s\n", strerror(errno));
+	err = bpf_object__load(obj);
+	if (err) {
+		printf("bpf_object__load(): %s\n", strerror(errno));
 		return 1;
 	}
+	prog_fd = bpf_program__fd(prog);
 
 	lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
 	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index fb2532d13aac..f2d90cba5164 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -450,14 +450,12 @@ static void stats_poll(int interval, int action, __u32 cfg_opt)
 int main(int argc, char **argv)
 {
 	__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
-	};
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
 	int prog_fd, map_fd, opt, err;
 	bool use_separators = true;
 	struct config cfg = { 0 };
+	struct bpf_program *prog;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	char filename[256];
@@ -471,11 +469,19 @@ int main(int argc, char **argv)
 	char *action_str = NULL;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	prog_load_attr.file = filename;
 
-	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj))
 		return EXIT_FAIL;
 
+	prog = bpf_object__next_program(obj, NULL);
+	bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+	err = bpf_object__load(obj);
+	if (err)
+		return EXIT_FAIL;
+	prog_fd = bpf_program__fd(prog);
+
 	map =  bpf_object__find_map_by_name(obj, "config_map");
 	stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map");
 	rx_queue_index_map = bpf_object__find_map_by_name(obj, "rx_queue_index_map");
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
index ae70a7943d85..c4332d068b91 100644
--- a/samples/bpf/xdp_sample_user.c
+++ b/samples/bpf/xdp_sample_user.c
@@ -1218,7 +1218,7 @@ int sample_setup_maps(struct bpf_map **maps)
 		default:
 			return -EINVAL;
 		}
-		if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0)
+		if (bpf_map__set_max_entries(sample_map[i], sample_map_count[i]) < 0)
 			return -errno;
 	}
 	sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
index 5f44b877ecf5..f45051679977 100644
--- a/samples/bpf/xdp_sample_user.h
+++ b/samples/bpf/xdp_sample_user.h
@@ -61,7 +61,7 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size)
 
 #define __attach_tp(name)                                                      \
 	({                                                                     \
-		if (!bpf_program__is_tracing(skel->progs.name))                \
+		if (bpf_program__type(skel->progs.name) != BPF_PROG_TYPE_TRACING)\
 			return -EINVAL;                                        \
 		skel->links.name = bpf_program__attach(skel->progs.name);      \
 		if (!skel->links.name)                                         \
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 7370c03c96fc..2e811e4331cc 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -152,9 +152,6 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port)
 
 int main(int argc, char **argv)
 {
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
-	};
 	int min_port = 0, max_port = 0, vip2tnl_map_fd;
 	const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
 	unsigned char opt_flags[256] = {};
@@ -162,6 +159,7 @@ int main(int argc, char **argv)
 	__u32 info_len = sizeof(info);
 	unsigned int kill_after_s = 0;
 	struct iptnl_info tnl = {};
+	struct bpf_program *prog;
 	struct bpf_object *obj;
 	struct vip vip = {};
 	char filename[256];
@@ -259,15 +257,20 @@ int main(int argc, char **argv)
 	}
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	prog_load_attr.file = filename;
 
-	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj))
 		return 1;
 
-	if (!prog_fd) {
-		printf("bpf_prog_load_xattr: %s\n", strerror(errno));
+	prog = bpf_object__next_program(obj, NULL);
+	bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+	err = bpf_object__load(obj);
+	if (err) {
+		printf("bpf_object__load(): %s\n", strerror(errno));
 		return 1;
 	}
+	prog_fd = bpf_program__fd(prog);
 
 	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
 	vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl");
diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh
index e6093adf4c06..c293941612e7 100755
--- a/scripts/pahole-flags.sh
+++ b/scripts/pahole-flags.sh
@@ -7,7 +7,7 @@ if ! [ -x "$(command -v ${PAHOLE})" ]; then
 	exit 0
 fi
 
-pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
+pahole_ver=$($(dirname $0)/pahole-version.sh ${PAHOLE})
 
 if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then
 	# pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars
diff --git a/scripts/pahole-version.sh b/scripts/pahole-version.sh
new file mode 100755
index 000000000000..f8a32ab93ad1
--- /dev/null
+++ b/scripts/pahole-version.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage: $ ./pahole-version.sh pahole
+#
+# Prints pahole's version in a 3-digit form, such as 119 for v1.19.
+
+if [ ! -x "$(command -v "$@")" ]; then
+	echo 0
+	exit 1
+fi
+
+"$@" --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 111dff809c7b..606743c6db41 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -310,7 +310,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
 {
 	const char *prog_name = prog_info->name;
 	const struct btf_type *func_type;
-	const struct bpf_func_info finfo;
+	const struct bpf_func_info finfo = {};
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
 	struct btf *prog_btf = NULL;
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index e999159fa28d..9c894b1447de 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -487,17 +487,12 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
 	size_t maxlen;
 	bool res;
 
-	if (ifindex)
-		/* Only test offload-able program types */
-		switch (prog_type) {
-		case BPF_PROG_TYPE_SCHED_CLS:
-		case BPF_PROG_TYPE_XDP:
-			break;
-		default:
-			return;
-		}
+	if (ifindex) {
+		p_info("BPF offload feature probing is not supported");
+		return;
+	}
 
-	res = bpf_probe_prog_type(prog_type, ifindex);
+	res = libbpf_probe_bpf_prog_type(prog_type, NULL);
 #ifdef USE_LIBCAP
 	/* Probe may succeed even if program load fails, for unprivileged users
 	 * check that we did not fail because of insufficient permissions
@@ -535,7 +530,12 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
 	size_t maxlen;
 	bool res;
 
-	res = bpf_probe_map_type(map_type, ifindex);
+	if (ifindex) {
+		p_info("BPF offload feature probing is not supported");
+		return;
+	}
+
+	res = libbpf_probe_bpf_map_type(map_type, NULL);
 
 	/* Probe result depends on the success of map creation, no additional
 	 * check required for unprivileged users
@@ -567,7 +567,12 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
 	bool res = false;
 
 	if (supported_type) {
-		res = bpf_probe_helper(id, prog_type, ifindex);
+		if (ifindex) {
+			p_info("BPF offload feature probing is not supported");
+			return;
+		}
+
+		res = libbpf_probe_bpf_helper(prog_type, id, NULL);
 #ifdef USE_LIBCAP
 		/* Probe may succeed even if program load fails, for
 		 * unprivileged users check that we did not fail because of
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 43e3f8700ecc..eacfc6a2060d 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -378,13 +378,16 @@ static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name)
 				int prog_fd = skel->progs.%2$s.prog_fd;		    \n\
 			", obj_name, bpf_program__name(prog));
 
-		switch (bpf_program__get_type(prog)) {
+		switch (bpf_program__type(prog)) {
 		case BPF_PROG_TYPE_RAW_TRACEPOINT:
 			tp_name = strchr(bpf_program__section_name(prog), '/') + 1;
-			printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
+			printf("\tint fd = skel_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
 			break;
 		case BPF_PROG_TYPE_TRACING:
-			printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n");
+			if (bpf_program__expected_attach_type(prog) == BPF_TRACE_ITER)
+				printf("\tint fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);\n");
+			else
+				printf("\tint fd = skel_raw_tracepoint_open(NULL, prog_fd);\n");
 			break;
 		default:
 			printf("\tint fd = ((void)prog_fd, 0); /* auto-attach not supported */\n");
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 9d01fa9de033..490f7bd54e4c 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -478,14 +478,11 @@ int main(int argc, char **argv)
 	}
 
 	if (!legacy_libbpf) {
-		enum libbpf_strict_mode mode;
-
 		/* Allow legacy map definitions for skeleton generation.
 		 * It will still be rejected if users use LIBBPF_STRICT_ALL
 		 * mode for loading generated skeleton.
 		 */
-		mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS;
-		ret = libbpf_set_strict_mode(mode);
+		ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
 		if (ret)
 			p_err("failed to enable libbpf strict mode: %d", ret);
 	}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index cf935c63e6f5..92a6f679ef7d 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1272,12 +1272,12 @@ static int do_run(int argc, char **argv)
 {
 	char *data_fname_in = NULL, *data_fname_out = NULL;
 	char *ctx_fname_in = NULL, *ctx_fname_out = NULL;
-	struct bpf_prog_test_run_attr test_attr = {0};
 	const unsigned int default_size = SZ_32K;
 	void *data_in = NULL, *data_out = NULL;
 	void *ctx_in = NULL, *ctx_out = NULL;
 	unsigned int repeat = 1;
 	int fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, test_attr);
 
 	if (!REQ_ARGS(4))
 		return -1;
@@ -1395,14 +1395,13 @@ static int do_run(int argc, char **argv)
 			goto free_ctx_in;
 	}
 
-	test_attr.prog_fd	= fd;
 	test_attr.repeat	= repeat;
 	test_attr.data_in	= data_in;
 	test_attr.data_out	= data_out;
 	test_attr.ctx_in	= ctx_in;
 	test_attr.ctx_out	= ctx_out;
 
-	err = bpf_prog_test_run_xattr(&test_attr);
+	err = bpf_prog_test_run_opts(fd, &test_attr);
 	if (err) {
 		p_err("failed to run program: %s", strerror(errno));
 		goto free_ctx_out;
@@ -2283,10 +2282,10 @@ static int do_profile(int argc, char **argv)
 	profile_obj->rodata->num_metric = num_metric;
 
 	/* adjust map sizes */
-	bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu);
-	bpf_map__resize(profile_obj->maps.fentry_readings, num_metric);
-	bpf_map__resize(profile_obj->maps.accum_readings, num_metric);
-	bpf_map__resize(profile_obj->maps.counts, 1);
+	bpf_map__set_max_entries(profile_obj->maps.events, num_metric * num_cpu);
+	bpf_map__set_max_entries(profile_obj->maps.fentry_readings, num_metric);
+	bpf_map__set_max_entries(profile_obj->maps.accum_readings, num_metric);
+	bpf_map__set_max_entries(profile_obj->maps.counts, 1);
 
 	/* change target name */
 	profile_tgt_name = profile_target_name(profile_tgt_fd);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 16a7574292a5..afe3d0d7f5f2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5076,6 +5076,16 @@ union bpf_attr {
  *		associated to *xdp_md*, at *offset*.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure. On error
+ *		*dst* buffer is zeroed out.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5269,6 +5279,7 @@ union bpf_attr {
 	FN(xdp_get_buff_len),		\
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
+	FN(copy_from_user_task),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5563,7 +5574,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__be16 dst_port;	/* network byte order */
+	__u16 :16;		/* zero padding */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
@@ -6441,7 +6453,8 @@ struct bpf_sk_lookup {
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
 	__u32 remote_ip4;	/* Network byte order */
 	__u32 remote_ip6[4];	/* Network byte order */
-	__u32 remote_port;	/* Network byte order */
+	__be16 remote_port;	/* Network byte order */
+	__u16 :16;		/* Zero padding */
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index f947b61b2107..b8b37fe76006 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -131,7 +131,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
 			   sort -u | wc -l)
 VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
 			      sed 's/\[.*\]//' | \
-			      awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+			      awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
 			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
 CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -194,7 +194,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
 		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
 		readelf --dyn-syms --wide $(OUTPUT)libbpf.so |		 \
 		    sed 's/\[.*\]//' |					 \
-		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'|  \
 		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
 		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
 		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index c2e8327010f9..16b21757b8bf 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -453,12 +453,14 @@ struct bpf_prog_test_run_attr {
 			     * out: length of cxt_out */
 };
 
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
 LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
 
 /*
  * bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_xattr instead.
+ * using bpf_prog_test_run_opts instead.
  */
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
 LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
 				 __u32 size, void *data_out, __u32 *size_out,
 				 __u32 *retval, __u32 *duration);
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 90f56b0f585f..e3a8c947e89f 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -76,6 +76,9 @@
 #define __PT_RC_REG ax
 #define __PT_SP_REG sp
 #define __PT_IP_REG ip
+/* syscall uses r10 for PARM4 */
+#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)
 
 #else
 
@@ -105,6 +108,9 @@
 #define __PT_RC_REG rax
 #define __PT_SP_REG rsp
 #define __PT_IP_REG rip
+/* syscall uses r10 for PARM4 */
+#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)
 
 #endif /* __i386__ */
 
@@ -112,6 +118,10 @@
 
 #elif defined(bpf_target_s390)
 
+struct pt_regs___s390 {
+	unsigned long orig_gpr2;
+};
+
 /* s390 provides user_pt_regs instead of struct pt_regs to userspace */
 #define __PT_REGS_CAST(x) ((const user_pt_regs *)(x))
 #define __PT_PARM1_REG gprs[2]
@@ -124,6 +134,8 @@
 #define __PT_RC_REG gprs[2]
 #define __PT_SP_REG gprs[15]
 #define __PT_IP_REG psw.addr
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)
 
 #elif defined(bpf_target_arm)
 
@@ -140,6 +152,10 @@
 
 #elif defined(bpf_target_arm64)
 
+struct pt_regs___arm64 {
+	unsigned long orig_x0;
+};
+
 /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
 #define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
 #define __PT_PARM1_REG regs[0]
@@ -152,6 +168,8 @@
 #define __PT_RC_REG regs[0]
 #define __PT_SP_REG sp
 #define __PT_IP_REG pc
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)
 
 #elif defined(bpf_target_mips)
 
@@ -178,6 +196,8 @@
 #define __PT_RC_REG gpr[3]
 #define __PT_SP_REG sp
 #define __PT_IP_REG nip
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
 
 #elif defined(bpf_target_sparc)
 
@@ -206,10 +226,12 @@
 #define __PT_PARM4_REG a3
 #define __PT_PARM5_REG a4
 #define __PT_RET_REG ra
-#define __PT_FP_REG fp
+#define __PT_FP_REG s0
 #define __PT_RC_REG a5
 #define __PT_SP_REG sp
-#define __PT_IP_REG epc
+#define __PT_IP_REG pc
+/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
 
 #endif
 
@@ -263,6 +285,26 @@ struct pt_regs;
 
 #endif
 
+#ifndef PT_REGS_PARM1_SYSCALL
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x)
+#endif
+#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x)
+#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x)
+#ifndef PT_REGS_PARM4_SYSCALL
+#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x)
+#endif
+#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x)
+
+#ifndef PT_REGS_PARM1_CORE_SYSCALL
+#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x)
+#endif
+#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x)
+#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x)
+#ifndef PT_REGS_PARM4_CORE_SYSCALL
+#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x)
+#endif
+#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x)
+
 #else /* defined(bpf_target_defined) */
 
 #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
@@ -290,8 +332,30 @@ struct pt_regs;
 #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
 #endif /* defined(bpf_target_defined) */
 
+/*
+ * When invoked from a syscall handler kprobe, returns a pointer to a
+ * struct pt_regs containing syscall arguments and suitable for passing to
+ * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL().
+ */
+#ifndef PT_REGS_SYSCALL_REGS
+/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx))
+#endif
+
 #ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
 #endif
@@ -406,4 +470,39 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 }									    \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 
+#define ___bpf_syscall_args0()           ctx
+#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
+ * tracing syscall functions, like __x64_sys_close. It hides the underlying
+ * platform-specific low-level way of getting syscall input arguments from
+ * struct pt_regs, and provides a familiar typed and named function arguments
+ * syntax and semantics of accessing syscall input parameters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ *
+ * This macro relies on BPF CO-RE support.
+ */
+#define BPF_KPROBE_SYSCALL(name, args...)				    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+	struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);		    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_syscall_args(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args)
+
 #endif
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 51862fdee850..951ac7475794 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -147,11 +147,10 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
 LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead")
-LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used")
 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 				    __u32 expected_key_size,
 				    __u32 expected_value_size,
@@ -159,8 +158,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
 LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
-LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
-					     __u32 *size);
+LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
 LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
 int btf_ext__reloc_func_info(const struct btf *btf,
 			     const struct btf_ext *btf_ext,
@@ -171,8 +169,10 @@ int btf_ext__reloc_line_info(const struct btf *btf,
 			     const struct btf_ext *btf_ext,
 			     const char *sec_name, __u32 insns_cnt,
 			     void **line_info, __u32 *cnt);
-LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size")
+__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size")
+__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index b9a3260c83cb..07ebe70d3a30 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1861,14 +1861,16 @@ static int btf_dump_array_data(struct btf_dump *d,
 {
 	const struct btf_array *array = btf_array(t);
 	const struct btf_type *elem_type;
-	__u32 i, elem_size = 0, elem_type_id;
+	__u32 i, elem_type_id;
+	__s64 elem_size;
 	bool is_array_member;
 
 	elem_type_id = array->type;
 	elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
 	elem_size = btf__resolve_size(d->btf, elem_type_id);
 	if (elem_size <= 0) {
-		pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+		pr_warn("unexpected elem size %zd for array type [%u]\n",
+			(ssize_t)elem_size, id);
 		return -EINVAL;
 	}
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a8c750373ad5..2262bcdfee92 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -156,14 +156,6 @@ enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
 
 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 {
-	/* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
-	 * get all possible values we compensate last +1, and then (2*x - 1)
-	 * to get the bit mask
-	 */
-	if (mode != LIBBPF_STRICT_ALL
-	    && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
-		return errno = EINVAL, -EINVAL;
-
 	libbpf_mode = mode;
 	return 0;
 }
@@ -237,6 +229,8 @@ enum sec_def_flags {
 	SEC_SLOPPY_PFX = 16,
 	/* BPF program support non-linear XDP buffer */
 	SEC_XDP_FRAGS = 32,
+	/* deprecated sec definitions not supposed to be used */
+	SEC_DEPRECATED = 64,
 };
 
 struct bpf_sec_def {
@@ -4200,9 +4194,12 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 
 	if (!bpf_map__is_internal(map)) {
 		pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
 					   def->value_size, &key_type_id,
 					   &value_type_id);
+#pragma GCC diagnostic pop
 	} else {
 		/*
 		 * LLVM annotates global data differently in BTF, that is,
@@ -6575,6 +6572,10 @@ static int libbpf_preload_prog(struct bpf_program *prog,
 	if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
 		opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
 
+	if (def & SEC_DEPRECATED)
+		pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n",
+			prog->sec_name);
+
 	if ((prog->type == BPF_PROG_TYPE_TRACING ||
 	     prog->type == BPF_PROG_TYPE_LSM ||
 	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -7896,10 +7897,8 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
 	return 0;
 }
 
-const char *bpf_map__get_pin_path(const struct bpf_map *map)
-{
-	return map->pin_path;
-}
+__alias(bpf_map__pin_path)
+const char *bpf_map__get_pin_path(const struct bpf_map *map);
 
 const char *bpf_map__pin_path(const struct bpf_map *map)
 {
@@ -8464,7 +8463,10 @@ static int bpf_program_nth_fd(const struct bpf_program *prog, int n)
 	return fd;
 }
 
-enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
+__alias(bpf_program__type)
+enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+
+enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
 {
 	return prog->type;
 }
@@ -8508,8 +8510,10 @@ BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
 
-enum bpf_attach_type
-bpf_program__get_expected_attach_type(const struct bpf_program *prog)
+__alias(bpf_program__expected_attach_type)
+enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+
+enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
 {
 	return prog->expected_attach_type;
 }
@@ -8593,7 +8597,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("kretprobe/",		KPROBE, 0, SEC_NONE, attach_kprobe),
 	SEC_DEF("uretprobe/",		KPROBE, 0, SEC_NONE),
 	SEC_DEF("tc",			SCHED_CLS, 0, SEC_NONE),
-	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX),
+	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED),
 	SEC_DEF("action",		SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
 	SEC_DEF("tracepoint/",		TRACEPOINT, 0, SEC_NONE, attach_tp),
 	SEC_DEF("tp/",			TRACEPOINT, 0, SEC_NONE, attach_tp),
@@ -8612,11 +8616,14 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp.frags/devmap",	XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
-	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+	SEC_DEF("xdp/devmap",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
 	SEC_DEF("xdp.frags/cpumap",	XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
-	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+	SEC_DEF("xdp/cpumap",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
 	SEC_DEF("xdp.frags",		XDP, BPF_XDP, SEC_XDP_FRAGS),
 	SEC_DEF("xdp",			XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 	SEC_DEF("perf_event",		PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -9459,7 +9466,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
 	open_attr.file = attr->file;
 	open_attr.prog_type = attr->prog_type;
 
-	obj = bpf_object__open_xattr(&open_attr);
+	obj = __bpf_object__open_xattr(&open_attr, 0);
 	err = libbpf_get_error(obj);
 	if (err)
 		return libbpf_err(-ENOENT);
@@ -9476,7 +9483,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
 			bpf_program__set_expected_attach_type(prog,
 							      attach_type);
 		}
-		if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
+		if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) {
 			/*
 			 * we haven't guessed from section name and user
 			 * didn't provide a fallback type, too bad...
@@ -9493,7 +9500,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
 	}
 
 	bpf_object__for_each_map(map, obj) {
-		if (!bpf_map__is_offload_neutral(map))
+		if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
 			map->map_ifindex = attr->ifindex;
 	}
 
@@ -10527,7 +10534,7 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id
 		return libbpf_err_ptr(-ENOMEM);
 	link->detach = &bpf_link__detach_fd;
 
-	attach_type = bpf_program__get_expected_attach_type(prog);
+	attach_type = bpf_program__expected_attach_type(prog);
 	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
 	if (link_fd < 0) {
 		link_fd = -errno;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 94670066de62..c8d8daad212e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -180,9 +180,11 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 		     const struct bpf_object_open_opts *opts);
 
 /* deprecated bpf_object__open variants */
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead")
 LIBBPF_API struct bpf_object *
 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
 			const char *name);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead")
 LIBBPF_API struct bpf_object *
 bpf_object__open_xattr(struct bpf_object_open_attr *attr);
 
@@ -244,8 +246,10 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev);
 	     (pos) = (tmp), (tmp) = bpf_object__next(tmp))
 
 typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
 LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 				    bpf_object_clear_priv_t clear_priv);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
 LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
 
 LIBBPF_API int
@@ -277,9 +281,10 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog)
 
 typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
 
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
 LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 				     bpf_program_clear_priv_t clear_priv);
-
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
 LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
 					 __u32 ifindex);
@@ -591,26 +596,39 @@ LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
 /*
  * Adjust type of BPF program. Default is kprobe.
  */
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
 LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
 
-LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
 				      enum bpf_prog_type type);
 
 LIBBPF_API enum bpf_attach_type
-bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+bpf_program__expected_attach_type(const struct bpf_program *prog);
 LIBBPF_API void
 bpf_program__set_expected_attach_type(struct bpf_program *prog,
 				      enum bpf_attach_type type);
@@ -631,18 +649,31 @@ LIBBPF_API int
 bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
 			       const char *attach_func_name);
 
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
 LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
 
 /*
@@ -716,6 +747,7 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
 /* get/set map size (max_entries) */
 LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead")
 LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 /* get/set map flags */
 LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
@@ -740,8 +772,10 @@ LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
 LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
 				 bpf_map_clear_priv_t clear_priv);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
 LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
 					  const void *data, size_t size);
@@ -758,7 +792,6 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
  */
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
-LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
 LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e10f0822845a..aef6253a90c8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -424,10 +424,12 @@ LIBBPF_0.6.0 {
 LIBBPF_0.7.0 {
 	global:
 		bpf_btf_load;
+		bpf_program__expected_attach_type;
 		bpf_program__log_buf;
 		bpf_program__log_level;
 		bpf_program__set_log_buf;
 		bpf_program__set_log_level;
+		bpf_program__type;
 		bpf_xdp_attach;
 		bpf_xdp_detach;
 		bpf_xdp_query;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 1565679eb432..bc86b82e90d1 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -92,6 +92,9 @@
 # define offsetofend(TYPE, FIELD) \
 	(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
 #endif
+#ifndef __alias
+#define __alias(symbol) __attribute__((alias(#symbol)))
+#endif
 
 /* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is
  * a string literal known at compilation time or char * pointer known only at
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index 3c2b281c2bc3..a283cf031665 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -86,6 +86,23 @@ LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
 
 #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
 
+/* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
+ * They are normally a trivial aliases or wrappers for proper APIs and are
+ * left to minimize unnecessary disruption for users of libbpf. But they
+ * shouldn't be used going forward.
+ */
+
+struct bpf_program;
+struct bpf_map;
+struct btf;
+struct btf_ext;
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 0b84d8e6b72a..dcd3336512d4 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -70,19 +70,85 @@ static inline int skel_closenz(int fd)
 	return -EINVAL;
 }
 
+#ifndef offsetofend
+#define offsetofend(TYPE, MEMBER) \
+	(offsetof(TYPE, MEMBER)	+ sizeof((((TYPE *)0)->MEMBER)))
+#endif
+
+static inline int skel_map_create(enum bpf_map_type map_type,
+				  const char *map_name,
+				  __u32 key_size,
+				  __u32 value_size,
+				  __u32 max_entries)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+
+	attr.map_type = map_type;
+	strncpy(attr.map_name, map_name, sizeof(attr.map_name));
+	attr.key_size = key_size;
+	attr.value_size = value_size;
+	attr.max_entries = max_entries;
+
+	return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz);
+}
+
+static inline int skel_map_update_elem(int fd, const void *key,
+				       const void *value, __u64 flags)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.map_fd = fd;
+	attr.key = (long) key;
+	attr.value = (long) value;
+	attr.flags = flags;
+
+	return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
+}
+
+static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.raw_tracepoint.name = (long) name;
+	attr.raw_tracepoint.prog_fd = prog_fd;
+
+	return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
+}
+
+static inline int skel_link_create(int prog_fd, int target_fd,
+				   enum bpf_attach_type attach_type)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.link_create.prog_fd = prog_fd;
+	attr.link_create.target_fd = target_fd;
+	attr.link_create.attach_type = attach_type;
+
+	return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
+}
+
 static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 {
 	int map_fd = -1, prog_fd = -1, key = 0, err;
 	union bpf_attr attr;
 
-	map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL);
+	map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
 	if (map_fd < 0) {
 		opts->errstr = "failed to create loader map";
 		err = -errno;
 		goto out;
 	}
 
-	err = bpf_map_update_elem(map_fd, &key, opts->data, 0);
+	err = skel_map_update_elem(map_fd, &key, opts->data, 0);
 	if (err < 0) {
 		opts->errstr = "failed to update loader map";
 		err = -errno;
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index 8ac0a3a457ef..0bc25a56cfef 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -13,7 +13,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
 	struct bpf_object *obj;
 
-	obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
+	obj = bpf_object__open_mem(obj_buf, obj_buf_sz, NULL);
 	if (libbpf_get_error(obj))
 		return TEST_FAIL;
 	bpf_object__close(obj);
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index ef2832b4d5cc..3cd5ae200f2c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -54,6 +54,7 @@ static bool libbpf_initialized;
 struct bpf_object *
 bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
 {
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name);
 	struct bpf_object *obj;
 
 	if (!libbpf_initialized) {
@@ -61,7 +62,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
 		libbpf_initialized = true;
 	}
 
-	obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
+	obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
 	if (IS_ERR_OR_NULL(obj)) {
 		pr_debug("bpf: failed to load buffer\n");
 		return ERR_PTR(-EINVAL);
@@ -72,6 +73,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
 
 struct bpf_object *bpf__prepare_load(const char *filename, bool source)
 {
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename);
 	struct bpf_object *obj;
 
 	if (!libbpf_initialized) {
@@ -94,7 +96,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
 				return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
 		} else
 			pr_debug("bpf: successful builtin compilation\n");
-		obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
+		obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
 
 		if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
 			llvm__dump_obj(filename, obj_buf, obj_buf_sz);
@@ -654,11 +656,11 @@ int bpf__probe(struct bpf_object *obj)
 		}
 
 		if (priv->is_tp) {
-			bpf_program__set_tracepoint(prog);
+			bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
 			continue;
 		}
 
-		bpf_program__set_kprobe(prog);
+		bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE);
 		pev = &priv->pev;
 
 		err = convert_perf_probe_events(pev, 1);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 945f92d71db3..91ea729990da 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -330,7 +330,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
 	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
-	map_ptr_kern.c core_kern.c
+	map_ptr_kern.c core_kern.c core_kern_overflow.c
 # Generate both light skeleton and libbpf skeleton for these
 LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c
 SKEL_BLACKLIST += $$(LSKELS)
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 42ef250c7acc..d099d91adc3b 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -206,6 +206,8 @@ btf_tag test and Clang version
 
 The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and
 btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_].
+The subtests ``btf_type_tag_user_{mod1, mod2, vmlinux}`` also requires
+pahole version ``1.23``.
 
 Without them, the btf_tag selftest will be skipped and you will observe:
 
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index da8593b3494a..c2554f9695ff 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -151,7 +151,7 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
 		/* record data + header take 16 bytes */
 		skel->rodata->wakeup_data_size = args.sample_rate * 16;
 
-	bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+	bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
 
 	if (ringbuf_bench__load(skel)) {
 		fprintf(stderr, "failed to load skeleton\n");
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 7f957c55a3ca..0c481de2833d 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -154,7 +154,6 @@ static void *uprobe_producer_without_nop(void *input)
 static void usetup(bool use_retprobe, bool use_nop)
 {
 	size_t uprobe_offset;
-	ssize_t base_addr;
 	struct bpf_link *link;
 
 	setup_libbpf();
@@ -165,11 +164,10 @@ static void usetup(bool use_retprobe, bool use_nop)
 		exit(1);
 	}
 
-	base_addr = get_base_addr();
 	if (use_nop)
-		uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr);
+		uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop);
 	else
-		uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr);
+		uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop);
 
 	link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
 					  use_retprobe,
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index bdbacf5adcd2..27d63be47b95 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -13,6 +13,10 @@
 #define CREATE_TRACE_POINTS
 #include "bpf_testmod-events.h"
 
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
 DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
 
 noinline void
@@ -21,6 +25,27 @@ bpf_testmod_test_mod_kfunc(int i)
 	*(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
 }
 
+struct bpf_testmod_btf_type_tag_1 {
+	int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+	struct bpf_testmod_btf_type_tag_1 __user *p;
+};
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
+	BTF_TYPE_EMIT(func_proto_typedef);
+	BTF_TYPE_EMIT(func_proto_typedef_nested1);
+	BTF_TYPE_EMIT(func_proto_typedef_nested2);
+	return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) {
+	return arg->p->a;
+}
+
 noinline int bpf_testmod_loop_test(int n)
 {
 	int i, sum = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 86b7d5d84eec..ab62aba10e2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -7,18 +7,18 @@
 static void test_add(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__add__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(add)"))
 		return;
 
 	prog_fd = skel->progs.add.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run add",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
@@ -39,19 +39,18 @@ cleanup:
 static void test_sub(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__sub__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
 		return;
 
 	prog_fd = skel->progs.sub.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run sub",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value");
@@ -72,18 +71,18 @@ cleanup:
 static void test_and(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__and__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(and)"))
 		return;
 
 	prog_fd = skel->progs.and.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run and",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value");
@@ -100,19 +99,18 @@ cleanup:
 static void test_or(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__or__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(or)"))
 		return;
 
 	prog_fd = skel->progs.or.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run or",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value");
@@ -129,18 +127,18 @@ cleanup:
 static void test_xor(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__xor__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
 		return;
 
 	prog_fd = skel->progs.xor.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run xor",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value");
@@ -157,18 +155,18 @@ cleanup:
 static void test_cmpxchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__cmpxchg__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
 		return;
 
 	prog_fd = skel->progs.cmpxchg.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run cmpxchg",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value");
@@ -186,18 +184,18 @@ cleanup:
 static void test_xchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__xchg__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
 		return;
 
 	prog_fd = skel->progs.xchg.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run xchg",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value");
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index d0bd51eb23c8..d48f6e533e1e 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -5,9 +5,10 @@
 /* this is how USDT semaphore is actually defined, except volatile modifier */
 volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
 
-/* attach point */
-static void method(void) {
-	return ;
+/* uprobe attach point */
+static void trigger_func(void)
+{
+	asm volatile ("");
 }
 
 void test_attach_probe(void)
@@ -17,8 +18,7 @@ void test_attach_probe(void)
 	struct bpf_link *kprobe_link, *kretprobe_link;
 	struct bpf_link *uprobe_link, *uretprobe_link;
 	struct test_attach_probe* skel;
-	size_t uprobe_offset;
-	ssize_t base_addr, ref_ctr_offset;
+	ssize_t uprobe_offset, ref_ctr_offset;
 	bool legacy;
 
 	/* Check if new-style kprobe/uprobe API is supported.
@@ -34,11 +34,9 @@ void test_attach_probe(void)
 	 */
 	legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0;
 
-	base_addr = get_base_addr();
-	if (CHECK(base_addr < 0, "get_base_addr",
-		  "failed to find base addr: %zd", base_addr))
+	uprobe_offset = get_uprobe_offset(&trigger_func);
+	if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
 		return;
-	uprobe_offset = get_uprobe_offset(&method, base_addr);
 
 	ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
 	if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
@@ -103,7 +101,7 @@ void test_attach_probe(void)
 		goto cleanup;
 
 	/* trigger & validate uprobe & uretprobe */
-	method();
+	trigger_func();
 
 	if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
 		  "wrong uprobe res: %d\n", skel->bss->uprobe_res))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 5eea3c3a40fe..cd10df6cd0fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -8,6 +8,12 @@
 #include <test_progs.h>
 #include "test_bpf_cookie.skel.h"
 
+/* uprobe attach point */
+static void trigger_func(void)
+{
+	asm volatile ("");
+}
+
 static void kprobe_subtest(struct test_bpf_cookie *skel)
 {
 	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
@@ -62,11 +68,11 @@ static void uprobe_subtest(struct test_bpf_cookie *skel)
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
 	struct bpf_link *link1 = NULL, *link2 = NULL;
 	struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
-	size_t uprobe_offset;
-	ssize_t base_addr;
+	ssize_t uprobe_offset;
 
-	base_addr = get_base_addr();
-	uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+	uprobe_offset = get_uprobe_offset(&trigger_func);
+	if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+		goto cleanup;
 
 	/* attach two uprobes */
 	opts.bpf_cookie = 0x100;
@@ -99,7 +105,7 @@ static void uprobe_subtest(struct test_bpf_cookie *skel)
 		goto cleanup;
 
 	/* trigger uprobe && uretprobe */
-	get_base_addr();
+	trigger_func();
 
 	ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
 	ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..5142a7d130b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,24 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
+		  "num_expected_failure_copy_from_user_task");
+	ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
+		  "num_success_copy_from_user_task");
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1270,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index e3166a81e989..dd30b1e3a67c 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -11,7 +11,12 @@ enum {
 void test_bpf_nf_ct(int mode)
 {
 	struct test_bpf_nf *skel;
-	int prog_fd, err, retval;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = test_bpf_nf__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
@@ -22,8 +27,7 @@ void test_bpf_nf_ct(int mode)
 	else
 		prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
-				(__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto end;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 14f9b6136794..8b652f5ce423 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3939,6 +3939,25 @@ static struct btf_raw_test raw_tests[] = {
 	.err_str = "Invalid component_idx",
 },
 {
+	.descr = "decl_tag test #15, func, invalid func proto",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, 0),		/* [2] */
+		BTF_FUNC_ENC(NAME_TBD, 8),			/* [3] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0func"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+{
 	.descr = "type_tag test #1",
 	.raw_types = {
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
@@ -4561,7 +4580,7 @@ static void do_test_file(unsigned int test_num)
 	btf_ext__free(btf_ext);
 
 	/* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
-	libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
 	obj = bpf_object__open(test->file);
 	err = libbpf_get_error(obj);
 	if (CHECK(err, "obj: %d", err))
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
index 88d63e23e35f..f7560b54a6bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -1,19 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
 #include <test_progs.h>
-#include "btf_decl_tag.skel.h"
+#include <bpf/btf.h>
+#include "test_btf_decl_tag.skel.h"
 
 /* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */
 struct btf_type_tag_test {
         int **p;
 };
 #include "btf_type_tag.skel.h"
+#include "btf_type_tag_user.skel.h"
 
 static void test_btf_decl_tag(void)
 {
-	struct btf_decl_tag *skel;
+	struct test_btf_decl_tag *skel;
 
-	skel = btf_decl_tag__open_and_load();
+	skel = test_btf_decl_tag__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "btf_decl_tag"))
 		return;
 
@@ -22,7 +24,7 @@ static void test_btf_decl_tag(void)
 		test__skip();
 	}
 
-	btf_decl_tag__destroy(skel);
+	test_btf_decl_tag__destroy(skel);
 }
 
 static void test_btf_type_tag(void)
@@ -41,10 +43,101 @@ static void test_btf_type_tag(void)
 	btf_type_tag__destroy(skel);
 }
 
+static void test_btf_type_tag_mod_user(bool load_test_user1)
+{
+	const char *module_name = "bpf_testmod";
+	struct btf *vmlinux_btf, *module_btf;
+	struct btf_type_tag_user *skel;
+	__s32 type_id;
+	int err;
+
+	if (!env.has_testmod) {
+		test__skip();
+		return;
+	}
+
+	/* skip the test if the module does not have __user tags */
+	vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+		return;
+
+	module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+	if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+		goto free_vmlinux_btf;
+
+	type_id = btf__find_by_name_kind(module_btf, "user", BTF_KIND_TYPE_TAG);
+	if (type_id <= 0) {
+		printf("%s:SKIP: btf_type_tag attribute not in %s", __func__, module_name);
+		test__skip();
+		goto free_module_btf;
+	}
+
+	skel = btf_type_tag_user__open();
+	if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+		goto free_module_btf;
+
+	bpf_program__set_autoload(skel->progs.test_sys_getsockname, false);
+	if (load_test_user1)
+		bpf_program__set_autoload(skel->progs.test_user2, false);
+	else
+		bpf_program__set_autoload(skel->progs.test_user1, false);
+
+	err = btf_type_tag_user__load(skel);
+	ASSERT_ERR(err, "btf_type_tag_user");
+
+	btf_type_tag_user__destroy(skel);
+
+free_module_btf:
+	btf__free(module_btf);
+free_vmlinux_btf:
+	btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_user(void)
+{
+	struct btf_type_tag_user *skel;
+	struct btf *vmlinux_btf;
+	__s32 type_id;
+	int err;
+
+	/* skip the test if the vmlinux does not have __user tags */
+	vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+		return;
+
+	type_id = btf__find_by_name_kind(vmlinux_btf, "user", BTF_KIND_TYPE_TAG);
+	if (type_id <= 0) {
+		printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__);
+		test__skip();
+		goto free_vmlinux_btf;
+	}
+
+	skel = btf_type_tag_user__open();
+	if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+		goto free_vmlinux_btf;
+
+	bpf_program__set_autoload(skel->progs.test_user2, false);
+	bpf_program__set_autoload(skel->progs.test_user1, false);
+
+	err = btf_type_tag_user__load(skel);
+	ASSERT_ERR(err, "btf_type_tag_user");
+
+	btf_type_tag_user__destroy(skel);
+
+free_vmlinux_btf:
+	btf__free(vmlinux_btf);
+}
+
 void test_btf_tag(void)
 {
 	if (test__start_subtest("btf_decl_tag"))
 		test_btf_decl_tag();
 	if (test__start_subtest("btf_type_tag"))
 		test_btf_type_tag();
+	if (test__start_subtest("btf_type_tag_user_mod1"))
+		test_btf_type_tag_mod_user(true);
+	if (test__start_subtest("btf_type_tag_user_mod2"))
+		test_btf_type_tag_mod_user(false);
+	if (test__start_subtest("btf_type_tag_sys_user_vmlinux"))
+		test_btf_type_tag_vmlinux_user();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index f73e6e36b74d..12f4395f18b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -79,28 +79,21 @@ static void test_check_mtu_run_xdp(struct test_check_mtu *skel,
 				   struct bpf_program *prog,
 				   __u32 mtu_expect)
 {
-	const char *prog_name = bpf_program__name(prog);
 	int retval_expect = XDP_PASS;
 	__u32 mtu_result = 0;
 	char buf[256] = {};
-	int err;
-	struct bpf_prog_test_run_attr tattr = {
+	int err, prog_fd = bpf_program__fd(prog);
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.repeat = 1,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.data_out = buf,
 		.data_size_out = sizeof(buf),
-		.prog_fd = bpf_program__fd(prog),
-	};
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != 0, "bpf_prog_test_run",
-		   "prog_name:%s (err %d errno %d retval %d)\n",
-		   prog_name, err, errno, tattr.retval);
+	);
 
-	CHECK(tattr.retval != retval_expect, "retval",
-	      "progname:%s unexpected retval=%d expected=%d\n",
-	      prog_name, tattr.retval, retval_expect);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, retval_expect, "retval");
 
 	/* Extract MTU that BPF-prog got */
 	mtu_result = skel->bss->global_bpf_mtu_xdp;
@@ -139,28 +132,21 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
 				  struct bpf_program *prog,
 				  __u32 mtu_expect)
 {
-	const char *prog_name = bpf_program__name(prog);
 	int retval_expect = BPF_OK;
 	__u32 mtu_result = 0;
 	char buf[256] = {};
-	int err;
-	struct bpf_prog_test_run_attr tattr = {
-		.repeat = 1,
+	int err, prog_fd = bpf_program__fd(prog);
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.data_out = buf,
 		.data_size_out = sizeof(buf),
-		.prog_fd = bpf_program__fd(prog),
-	};
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != 0, "bpf_prog_test_run",
-		   "prog_name:%s (err %d errno %d retval %d)\n",
-		   prog_name, err, errno, tattr.retval);
+		.repeat = 1,
+	);
 
-	CHECK(tattr.retval != retval_expect, "retval",
-	      "progname:%s unexpected retval=%d expected=%d\n",
-	      prog_name, tattr.retval, retval_expect);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, retval_expect, "retval");
 
 	/* Extract MTU that BPF-prog got */
 	mtu_result = skel->bss->global_bpf_mtu_tc;
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index e075d03ab630..224f016b0a53 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -161,7 +161,7 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
 	}
 }
 
-static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+static bool was_decapsulated(struct bpf_test_run_opts *tattr)
 {
 	return tattr->data_size_out < tattr->data_size_in;
 }
@@ -367,12 +367,12 @@ static void close_fds(int *fds, int n)
 
 static void test_cls_redirect_common(struct bpf_program *prog)
 {
-	struct bpf_prog_test_run_attr tattr = {};
+	LIBBPF_OPTS(bpf_test_run_opts, tattr);
 	int families[] = { AF_INET, AF_INET6 };
 	struct sockaddr_storage ss;
 	struct sockaddr *addr;
 	socklen_t slen;
-	int i, j, err;
+	int i, j, err, prog_fd;
 	int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
 	int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
 	struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
@@ -394,7 +394,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 			goto cleanup;
 	}
 
-	tattr.prog_fd = bpf_program__fd(prog);
+	prog_fd = bpf_program__fd(prog);
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct test_cfg *test = &tests[i];
 
@@ -415,7 +415,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 			if (CHECK_FAIL(!tattr.data_size_in))
 				continue;
 
-			err = bpf_prog_test_run_xattr(&tattr);
+			err = bpf_prog_test_run_opts(prog_fd, &tattr);
 			if (CHECK_FAIL(err))
 				continue;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c
index 561c5185d886..6a5a1c019a5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_kern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c
@@ -7,8 +7,22 @@
 void test_core_kern_lskel(void)
 {
 	struct core_kern_lskel *skel;
+	int link_fd;
 
 	skel = core_kern_lskel__open_and_load();
-	ASSERT_OK_PTR(skel, "open_and_load");
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	link_fd = core_kern_lskel__core_relo_proto__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(core_relo_proto)"))
+		goto cleanup;
+
+	/* trigger tracepoints */
+	usleep(1);
+	ASSERT_TRUE(skel->bss->proto_out[0], "bpf_core_type_exists");
+	ASSERT_FALSE(skel->bss->proto_out[1], "!bpf_core_type_exists");
+	ASSERT_TRUE(skel->bss->proto_out[2], "bpf_core_type_exists. nested");
+
+cleanup:
 	core_kern_lskel__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
new file mode 100644
index 000000000000..04cc145bc26a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "test_progs.h"
+#include "core_kern_overflow.lskel.h"
+
+void test_core_kern_overflow_lskel(void)
+{
+	struct core_kern_overflow_lskel *skel;
+
+	skel = core_kern_overflow_lskel__open_and_load();
+	if (!ASSERT_NULL(skel, "open_and_load"))
+		core_kern_overflow_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
index cbaa44ffb8c6..5aa52cc31dc2 100644
--- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -26,10 +26,10 @@ static void test_dummy_st_ops_attach(void)
 static void test_dummy_init_ret_value(void)
 {
 	__u64 args[1] = {0};
-	struct bpf_prog_test_run_attr attr = {
-		.ctx_size_in = sizeof(args),
+	LIBBPF_OPTS(bpf_test_run_opts, attr,
 		.ctx_in = args,
-	};
+		.ctx_size_in = sizeof(args),
+	);
 	struct dummy_st_ops *skel;
 	int fd, err;
 
@@ -38,8 +38,7 @@ static void test_dummy_init_ret_value(void)
 		return;
 
 	fd = bpf_program__fd(skel->progs.test_1);
-	attr.prog_fd = fd;
-	err = bpf_prog_test_run_xattr(&attr);
+	err = bpf_prog_test_run_opts(fd, &attr);
 	ASSERT_OK(err, "test_run");
 	ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
 
@@ -53,10 +52,10 @@ static void test_dummy_init_ptr_arg(void)
 		.val = exp_retval,
 	};
 	__u64 args[1] = {(unsigned long)&in_state};
-	struct bpf_prog_test_run_attr attr = {
-		.ctx_size_in = sizeof(args),
+	LIBBPF_OPTS(bpf_test_run_opts, attr,
 		.ctx_in = args,
-	};
+		.ctx_size_in = sizeof(args),
+	);
 	struct dummy_st_ops *skel;
 	int fd, err;
 
@@ -65,8 +64,7 @@ static void test_dummy_init_ptr_arg(void)
 		return;
 
 	fd = bpf_program__fd(skel->progs.test_1);
-	attr.prog_fd = fd;
-	err = bpf_prog_test_run_xattr(&attr);
+	err = bpf_prog_test_run_opts(fd, &attr);
 	ASSERT_OK(err, "test_run");
 	ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
 	ASSERT_EQ(attr.retval, exp_retval, "test_ret");
@@ -77,10 +75,10 @@ static void test_dummy_init_ptr_arg(void)
 static void test_dummy_multiple_args(void)
 {
 	__u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
-	struct bpf_prog_test_run_attr attr = {
-		.ctx_size_in = sizeof(args),
+	LIBBPF_OPTS(bpf_test_run_opts, attr,
 		.ctx_in = args,
-	};
+		.ctx_size_in = sizeof(args),
+	);
 	struct dummy_st_ops *skel;
 	int fd, err;
 	size_t i;
@@ -91,8 +89,7 @@ static void test_dummy_multiple_args(void)
 		return;
 
 	fd = bpf_program__fd(skel->progs.test_2);
-	attr.prog_fd = fd;
-	err = bpf_prog_test_run_xattr(&attr);
+	err = bpf_prog_test_run_opts(fd, &attr);
 	ASSERT_OK(err, "test_run");
 	for (i = 0; i < ARRAY_SIZE(args); i++) {
 		snprintf(name, sizeof(name), "arg %zu", i);
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 4374ac8a8a91..130f5b82d2e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -9,38 +9,34 @@ void test_fentry_fexit(void)
 	struct fentry_test_lskel *fentry_skel = NULL;
 	struct fexit_test_lskel *fexit_skel = NULL;
 	__u64 *fentry_res, *fexit_res;
-	__u32 duration = 0, retval;
 	int err, prog_fd, i;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	fentry_skel = fentry_test_lskel__open_and_load();
-	if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+	if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
 		goto close_prog;
 	fexit_skel = fexit_test_lskel__open_and_load();
-	if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+	if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
 		goto close_prog;
 
 	err = fentry_test_lskel__attach(fentry_skel);
-	if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "fentry_attach"))
 		goto close_prog;
 	err = fexit_test_lskel__attach(fexit_skel);
-	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "fexit_attach"))
 		goto close_prog;
 
 	prog_fd = fexit_skel->progs.test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv6",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv6 test_run");
+	ASSERT_OK(topts.retval, "ipv6 test retval");
 
 	fentry_res = (__u64 *)fentry_skel->bss;
 	fexit_res = (__u64 *)fexit_skel->bss;
 	printf("%lld\n", fentry_skel->bss->test1_result);
 	for (i = 0; i < 8; i++) {
-		CHECK(fentry_res[i] != 1, "result",
-		      "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
-		CHECK(fexit_res[i] != 1, "result",
-		      "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
+		ASSERT_EQ(fentry_res[i], 1, "fentry result");
+		ASSERT_EQ(fexit_res[i], 1, "fexit result");
 	}
 
 close_prog:
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 12921b3850d2..c0d1d61d5f66 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -6,9 +6,9 @@
 static int fentry_test(struct fentry_test_lskel *fentry_skel)
 {
 	int err, prog_fd, i;
-	__u32 duration = 0, retval;
 	int link_fd;
 	__u64 *result;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = fentry_test_lskel__attach(fentry_skel);
 	if (!ASSERT_OK(err, "fentry_attach"))
@@ -20,10 +20,9 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel)
 		return -1;
 
 	prog_fd = fentry_skel->progs.test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	result = (__u64 *)fentry_skel->bss;
 	for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index c52f99f6a909..d9aad15e0d24 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -58,12 +58,17 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 				      test_cb cb)
 {
 	struct bpf_object *obj = NULL, *tgt_obj;
-	__u32 retval, tgt_prog_id, info_len;
+	__u32 tgt_prog_id, info_len;
 	struct bpf_prog_info prog_info = {};
 	struct bpf_program **prog = NULL, *p;
 	struct bpf_link **link = NULL;
 	int err, tgt_fd, i;
 	struct btf *btf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v6,
+		.data_size_in = sizeof(pkt_v6),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
 			    &tgt_obj, &tgt_fd);
@@ -132,7 +137,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 					     &link_info, &info_len);
 		ASSERT_OK(err, "link_fd_get_info");
 		ASSERT_EQ(link_info.tracing.attach_type,
-			  bpf_program__get_expected_attach_type(prog[i]),
+			  bpf_program__expected_attach_type(prog[i]),
 			  "link_attach_type");
 		ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
 		ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
@@ -147,10 +152,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 	if (!run_prog)
 		goto close_prog;
 
-	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				NULL, NULL, &retval, NULL);
+	err = bpf_prog_test_run_opts(tgt_fd, &topts);
 	ASSERT_OK(err, "prog_run");
-	ASSERT_EQ(retval, 0, "prog_run_ret");
+	ASSERT_EQ(topts.retval, 0, "prog_run_ret");
 
 	if (check_data_map(obj, prog_cnt, false))
 		goto close_prog;
@@ -225,29 +229,31 @@ static int test_second_attach(struct bpf_object *obj)
 	const char *tgt_obj_file = "./test_pkt_access.o";
 	struct bpf_program *prog = NULL;
 	struct bpf_object *tgt_obj;
-	__u32 duration = 0, retval;
 	struct bpf_link *link;
 	int err = 0, tgt_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v6,
+		.data_size_in = sizeof(pkt_v6),
+		.repeat = 1,
+	);
 
 	prog = bpf_object__find_program_by_name(obj, prog_name);
-	if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+	if (!ASSERT_OK_PTR(prog, "find_prog"))
 		return -ENOENT;
 
 	err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
 			    &tgt_obj, &tgt_fd);
-	if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
-		  tgt_obj_file, err, errno))
+	if (!ASSERT_OK(err, "second_prog_load"))
 		return err;
 
 	link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
 	if (!ASSERT_OK_PTR(link, "second_link"))
 		goto out;
 
-	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "ipv6",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(tgt_fd, &topts);
+	if (!ASSERT_OK(err, "ipv6 test_run"))
+		goto out;
+	if (!ASSERT_OK(topts.retval, "ipv6 retval"))
 		goto out;
 
 	err = check_data_map(obj, 1, true);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index e4cede6b4b2d..3ee2107bbf7a 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -10,9 +10,7 @@ void test_fexit_stress(void)
 	char test_skb[128] = {};
 	int fexit_fd[CNT] = {};
 	int link_fd[CNT] = {};
-	__u32 duration = 0;
 	char error[4096];
-	__u32 prog_ret;
 	int err, i, filter_fd;
 
 	const struct bpf_insn trace_program[] = {
@@ -36,9 +34,15 @@ void test_fexit_stress(void)
 		.log_size = sizeof(error),
 	);
 
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = test_skb,
+		.data_size_in = sizeof(test_skb),
+		.repeat = 1,
+	);
+
 	err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
 					 trace_opts.expected_attach_type);
-	if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
+	if (!ASSERT_GT(err, 0, "find_vmlinux_btf_id"))
 		goto out;
 	trace_opts.attach_btf_id = err;
 
@@ -47,24 +51,20 @@ void test_fexit_stress(void)
 					    trace_program,
 					    sizeof(trace_program) / sizeof(struct bpf_insn),
 					    &trace_opts);
-		if (CHECK(fexit_fd[i] < 0, "fexit loaded",
-			  "failed: %d errno %d\n", fexit_fd[i], errno))
+		if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
 			goto out;
 		link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
-		if (CHECK(link_fd[i] < 0, "fexit attach failed",
-			  "prog %d failed: %d err %d\n", i, link_fd[i], errno))
+		if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
 			goto out;
 	}
 
 	filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
 				  skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
 				  &skb_opts);
-	if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
-		  filter_fd, errno))
+	if (!ASSERT_GE(filter_fd, 0, "test_program_loaded"))
 		goto out;
 
-	err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
-				0, &prog_ret, 0);
+	err = bpf_prog_test_run_opts(filter_fd, &topts);
 	close(filter_fd);
 	CHECK_FAIL(err);
 out:
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index d4887d8bb396..101b7343036b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -6,9 +6,9 @@
 static int fexit_test(struct fexit_test_lskel *fexit_skel)
 {
 	int err, prog_fd, i;
-	__u32 duration = 0, retval;
 	int link_fd;
 	__u64 *result;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = fexit_test_lskel__attach(fexit_skel);
 	if (!ASSERT_OK(err, "fexit_attach"))
@@ -20,10 +20,9 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel)
 		return -1;
 
 	prog_fd = fexit_skel->progs.test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	result = (__u64 *)fexit_skel->bss;
 	for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index dfafd62df50b..0c1661ea996e 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -13,8 +13,9 @@
 #endif
 
 #define CHECK_FLOW_KEYS(desc, got, expected)				\
-	CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0,		\
+	_CHECK(memcmp(&got, &expected, sizeof(got)) != 0,		\
 	      desc,							\
+	      topts.duration,						\
 	      "nhoff=%u/%u "						\
 	      "thoff=%u/%u "						\
 	      "addr_proto=0x%x/0x%x "					\
@@ -487,7 +488,7 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
 		/* Keep in sync with 'flags' from eth_get_headlen. */
 		__u32 eth_get_headlen_flags =
 			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
-		struct bpf_prog_test_run_attr tattr = {};
+		LIBBPF_OPTS(bpf_test_run_opts, topts);
 		struct bpf_flow_keys flow_keys = {};
 		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
 			    tests[i].keys.dport;
@@ -503,13 +504,12 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
 		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
 
 		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+		ASSERT_OK(err, "bpf_map_lookup_elem");
 
-		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
 
 		err = bpf_map_delete_elem(keys_fd, &key);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+		ASSERT_OK(err, "bpf_map_delete_elem");
 	}
 }
 
@@ -573,27 +573,24 @@ void test_flow_dissector(void)
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_flow_keys flow_keys;
-		struct bpf_prog_test_run_attr tattr = {
-			.prog_fd = prog_fd,
+		LIBBPF_OPTS(bpf_test_run_opts, topts,
 			.data_in = &tests[i].pkt,
 			.data_size_in = sizeof(tests[i].pkt),
 			.data_out = &flow_keys,
-		};
+		);
 		static struct bpf_flow_keys ctx = {};
 
 		if (tests[i].flags) {
-			tattr.ctx_in = &ctx;
-			tattr.ctx_size_in = sizeof(ctx);
+			topts.ctx_in = &ctx;
+			topts.ctx_size_in = sizeof(ctx);
 			ctx.flags = tests[i].flags;
 		}
 
-		err = bpf_prog_test_run_xattr(&tattr);
-		CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
-			   err || tattr.retval != 1,
-			   tests[i].name,
-			   "err %d errno %d retval %d duration %d size %u/%zu\n",
-			   err, errno, tattr.retval, tattr.duration,
-			   tattr.data_size_out, sizeof(flow_keys));
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		ASSERT_OK(err, "test_run");
+		ASSERT_EQ(topts.retval, 1, "test_run retval");
+		ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+			  "test_run data_size_out");
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 93ac3f28226c..36afb409c25f 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -5,7 +5,6 @@
 void serial_test_flow_dissector_load_bytes(void)
 {
 	struct bpf_flow_keys flow_keys;
-	__u32 duration = 0, retval, size;
 	struct bpf_insn prog[] = {
 		// BPF_REG_1 - 1st argument: context
 		// BPF_REG_2 - 2nd argument: offset, start at first byte
@@ -27,22 +26,25 @@ void serial_test_flow_dissector_load_bytes(void)
 		BPF_EXIT_INSN(),
 	};
 	int fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = &flow_keys,
+		.data_size_out = sizeof(flow_keys),
+		.repeat = 1,
+	);
 
 	/* make sure bpf_skb_load_bytes is not allowed from skb-less context
 	 */
 	fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
 			      ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
-	CHECK(fd < 0,
-	      "flow_dissector-bpf_skb_load_bytes-load",
-	      "fd %d errno %d\n",
-	      fd, errno);
+	ASSERT_GE(fd, 0, "bpf_test_load_program good fd");
 
-	err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
-				&flow_keys, &size, &retval, &duration);
-	CHECK(size != sizeof(flow_keys) || err || retval != 1,
-	      "flow_dissector-bpf_skb_load_bytes",
-	      "err %d errno %d retval %d duration %d size %u/%zu\n",
-	      err, errno, retval, duration, size, sizeof(flow_keys));
+	err = bpf_prog_test_run_opts(fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+		  "test_run data_size_out");
+	ASSERT_EQ(topts.retval, 1, "test_run retval");
 
 	if (fd >= -1)
 		close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 68eb12a287d4..044df13ee069 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -12,8 +12,13 @@ static void test_hash_map(void)
 	int i, err, hashmap_fd, max_entries, percpu_map_fd;
 	struct for_each_hash_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
-	__u32 key, num_cpus, retval;
+	__u32 key, num_cpus;
 	__u64 val;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = for_each_hash_map_elem__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
@@ -42,11 +47,10 @@ static void test_hash_map(void)
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
-				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
-				&retval, &duration);
-	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
-		  err, errno, retval))
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+	duration = topts.duration;
+	if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, topts.retval))
 		goto out;
 
 	ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
@@ -69,11 +73,16 @@ out:
 
 static void test_array_map(void)
 {
-	__u32 key, num_cpus, max_entries, retval;
+	__u32 key, num_cpus, max_entries;
 	int i, arraymap_fd, percpu_map_fd, err;
 	struct for_each_array_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
 	__u64 val, expected_total;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = for_each_array_map_elem__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
@@ -106,11 +115,10 @@ static void test_array_map(void)
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
-				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
-				&retval, &duration);
-	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
-		  err, errno, retval))
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+	duration = topts.duration;
+	if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, topts.retval))
 		goto out;
 
 	ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
index 85c427119fe9..28cf63963cb7 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -5,8 +5,8 @@
 void test_get_func_args_test(void)
 {
 	struct get_func_args_test *skel = NULL;
-	__u32 duration = 0, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	skel = get_func_args_test__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load"))
@@ -20,19 +20,17 @@ void test_get_func_args_test(void)
 	 * fentry/fexit programs.
 	 */
 	prog_fd = bpf_program__fd(skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	/* This runs bpf_modify_return_test function and triggers
 	 * fmod_ret_test and fexit_test programs.
 	 */
 	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 1234, "test_run");
+	ASSERT_EQ(topts.retval, 1234, "test_run");
 
 	ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
 	ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
index 02a465f36d59..938dbd4d7c2f 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -5,8 +5,8 @@
 void test_get_func_ip_test(void)
 {
 	struct get_func_ip_test *skel = NULL;
-	__u32 duration = 0, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	skel = get_func_ip_test__open();
 	if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
@@ -29,14 +29,12 @@ void test_get_func_ip_test(void)
 		goto cleanup;
 
 	prog_fd = bpf_program__fd(skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	prog_fd = bpf_program__fd(skel->progs.test5);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "test_run");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index 8d5a6023a1bb..5308de1ed478 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -27,7 +27,7 @@ void test_get_stackid_cannot_attach(void)
 		return;
 
 	/* override program type */
-	bpf_program__set_perf_event(skel->progs.oncpu);
+	bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
 
 	err = test_stacktrace_build_id__load(skel);
 	if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 917165e04427..6fb3d3155c35 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -132,24 +132,26 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
 void test_global_data(void)
 {
 	const char *file = "./test_global_data.o";
-	__u32 duration = 0, retval;
 	struct bpf_object *obj;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+	if (!ASSERT_OK(err, "load program"))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "pass global data run",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "pass global data run err");
+	ASSERT_OK(topts.retval, "pass global data run retval");
 
-	test_global_data_number(obj, duration);
-	test_global_data_string(obj, duration);
-	test_global_data_struct(obj, duration);
-	test_global_data_rdonly(obj, duration);
+	test_global_data_number(obj, topts.duration);
+	test_global_data_string(obj, topts.duration);
+	test_global_data_struct(obj, topts.duration);
+	test_global_data_rdonly(obj, topts.duration);
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
index 93a2439237b0..29039a36cce5 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
@@ -40,19 +40,21 @@ static void test_global_func_args0(struct bpf_object *obj)
 void test_global_func_args(void)
 {
 	const char *file = "./test_global_func_args.o";
-	__u32 retval;
 	struct bpf_object *obj;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
 	if (CHECK(err, "load program", "error %d loading %s\n", err, file))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "pass global func args run",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_OK(topts.retval, "test_run retval");
 
 	test_global_func_args0(obj);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index ce10d2fc3a6c..1cee6957285e 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -53,24 +53,24 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 void serial_test_kfree_skb(void)
 {
 	struct __sk_buff skb = {};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v6,
 		.data_size_in = sizeof(pkt_v6),
 		.ctx_in = &skb,
 		.ctx_size_in = sizeof(skb),
-	};
+	);
 	struct kfree_skb *skel = NULL;
 	struct bpf_link *link;
 	struct bpf_object *obj;
 	struct perf_buffer *pb = NULL;
-	int err;
+	int err, prog_fd;
 	bool passed = false;
 	__u32 duration = 0;
 	const int zero = 0;
 	bool test_ok[2];
 
 	err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
-			    &obj, &tattr.prog_fd);
+				 &obj, &prog_fd);
 	if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
 		return;
 
@@ -100,11 +100,9 @@ void serial_test_kfree_skb(void)
 		goto close_prog;
 
 	memcpy(skb.cb, &cb, sizeof(cb));
-	err = bpf_prog_test_run_xattr(&tattr);
-	duration = tattr.duration;
-	CHECK(err || tattr.retval, "ipv6",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, tattr.retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv6 test_run");
+	ASSERT_OK(topts.retval, "ipv6 test_run retval");
 
 	/* read perf buffer */
 	err = perf_buffer__poll(pb, 100);
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index b39a4f09aefd..c00eb974eb85 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -9,29 +9,31 @@
 static void test_main(void)
 {
 	struct kfunc_call_test_lskel *skel;
-	int prog_fd, retval, err;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = kfunc_call_test_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
 	prog_fd = skel->progs.kfunc_call_test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test1)");
-	ASSERT_EQ(retval, 12, "test1-retval");
+	ASSERT_EQ(topts.retval, 12, "test1-retval");
 
 	prog_fd = skel->progs.kfunc_call_test2.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test2)");
-	ASSERT_EQ(retval, 3, "test2-retval");
+	ASSERT_EQ(topts.retval, 3, "test2-retval");
 
 	prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
-	ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+	ASSERT_EQ(topts.retval, 0, "test_ref_btf_id-retval");
 
 	kfunc_call_test_lskel__destroy(skel);
 }
@@ -39,17 +41,21 @@ static void test_main(void)
 static void test_subprog(void)
 {
 	struct kfunc_call_test_subprog *skel;
-	int prog_fd, retval, err;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = kfunc_call_test_subprog__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
 	prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test1)");
-	ASSERT_EQ(retval, 10, "test1-retval");
+	ASSERT_EQ(topts.retval, 10, "test1-retval");
 	ASSERT_NEQ(skel->data->active_res, -1, "active_res");
 	ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
 
@@ -59,17 +65,21 @@ static void test_subprog(void)
 static void test_subprog_lskel(void)
 {
 	struct kfunc_call_test_subprog_lskel *skel;
-	int prog_fd, retval, err;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = kfunc_call_test_subprog_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
 	prog_fd = skel->progs.kfunc_call_test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test1)");
-	ASSERT_EQ(retval, 10, "test1-retval");
+	ASSERT_EQ(topts.retval, 10, "test1-retval");
 	ASSERT_NEQ(skel->data->active_res, -1, "active_res");
 	ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index d490ad80eccb..a1ebac70ec29 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -6,11 +6,15 @@
 #include "test_ksyms_module.lskel.h"
 #include "test_ksyms_module.skel.h"
 
-void test_ksyms_module_lskel(void)
+static void test_ksyms_module_lskel(void)
 {
 	struct test_ksyms_module_lskel *skel;
-	int retval;
 	int err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	if (!env.has_testmod) {
 		test__skip();
@@ -20,20 +24,24 @@ void test_ksyms_module_lskel(void)
 	skel = test_ksyms_module_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load"))
 		return;
-	err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(skel->progs.load.prog_fd, &topts);
 	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto cleanup;
-	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(topts.retval, 0, "retval");
 	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
 cleanup:
 	test_ksyms_module_lskel__destroy(skel);
 }
 
-void test_ksyms_module_libbpf(void)
+static void test_ksyms_module_libbpf(void)
 {
 	struct test_ksyms_module *skel;
-	int retval, err;
+	int err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	if (!env.has_testmod) {
 		test__skip();
@@ -43,11 +51,10 @@ void test_ksyms_module_libbpf(void)
 	skel = test_ksyms_module__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open"))
 		return;
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4,
-				sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.load), &topts);
 	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto cleanup;
-	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(topts.retval, 0, "retval");
 	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
 cleanup:
 	test_ksyms_module__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 540ef28fabff..55f733ff4109 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -23,12 +23,16 @@ static void test_l4lb(const char *file)
 		__u8 flags;
 	} real_def = {.dst = MAGIC_VAL};
 	__u32 ch_key = 11, real_num = 3;
-	__u32 duration, retval, size;
 	int err, i, prog_fd, map_fd;
 	__u64 bytes = 0, pkts = 0;
 	struct bpf_object *obj;
 	char buf[128];
 	u32 *magic = (u32 *)buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = NUM_ITER,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
@@ -49,19 +53,24 @@ static void test_l4lb(const char *file)
 		goto out;
 	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
 
-	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
-	      *magic != MAGIC_VAL, "ipv4",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	topts.data_in = &pkt_v4;
+	topts.data_size_in = sizeof(pkt_v4);
 
-	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
-	      *magic != MAGIC_VAL, "ipv6",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv4 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 magic");
+
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = sizeof(buf); /* reset out size */
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv6 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 magic");
 
 	map_fd = bpf_find_map(__func__, obj, "stats");
 	if (map_fd < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index e469b023962b..1ef377a7e731 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -202,7 +202,7 @@ static void bpf_btf_load_log_buf(void)
 	const void *raw_btf_data;
 	__u32 raw_btf_size;
 	struct btf *btf;
-	char *log_buf;
+	char *log_buf = NULL;
 	int fd = -1;
 
 	btf = btf__new_empty();
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 23d19e9cf26a..e4e99b37df64 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -4,14 +4,17 @@
 
 static void *spin_lock_thread(void *arg)
 {
-	__u32 duration, retval;
 	int err, prog_fd = *(u32 *) arg;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10000,
+	);
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+	ASSERT_OK(topts.retval, "test_run_opts retval");
 
-	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
 	pthread_exit(arg);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index 273725504f11..43e502acf050 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -9,10 +9,16 @@
 void test_map_ptr(void)
 {
 	struct map_ptr_kern_lskel *skel;
-	__u32 duration = 0, retval;
 	char buf[128];
 	int err;
 	int page_size = getpagesize();
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	skel = map_ptr_kern_lskel__open();
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -26,14 +32,12 @@ void test_map_ptr(void)
 
 	skel->bss->page_size = page_size;
 
-	err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4,
-				sizeof(pkt_v4), buf, NULL, &retval, NULL);
+	err = bpf_prog_test_run_opts(skel->progs.cg_skb.prog_fd, &topts);
 
-	if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+	if (!ASSERT_OK(err, "test_run"))
 		goto cleanup;
 
-	if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
-		  skel->bss->g_map_type, skel->bss->g_line))
+	if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
 		goto cleanup;
 
 cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index b772fe30ce9b..5d9955af6247 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -15,39 +15,31 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
 {
 	struct modify_return *skel = NULL;
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	__u16 side_effect;
 	__s16 ret;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	skel = modify_return__open_and_load();
-	if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto cleanup;
 
 	err = modify_return__attach(skel);
-	if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "modify_return__attach failed"))
 		goto cleanup;
 
 	skel->bss->input_retval = input_retval;
 	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
-				&retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
 
-	CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+	side_effect = UPPER(topts.retval);
+	ret = LOWER(topts.retval);
 
-	side_effect = UPPER(retval);
-	ret  = LOWER(retval);
-
-	CHECK(ret != want_ret, "test_run",
-	      "unexpected ret: %d, expected: %d\n", ret, want_ret);
-	CHECK(side_effect != want_side_effect, "modify_return",
-	      "unexpected side_effect: %d\n", side_effect);
-
-	CHECK(skel->bss->fentry_result != 1, "modify_return",
-	      "fentry failed\n");
-	CHECK(skel->bss->fexit_result != 1, "modify_return",
-	      "fexit failed\n");
-	CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
-	      "fmod_ret failed\n");
+	ASSERT_EQ(ret, want_ret, "test_run ret");
+	ASSERT_EQ(side_effect, want_side_effect, "modify_return side_effect");
+	ASSERT_EQ(skel->bss->fentry_result, 1, "modify_return fentry_result");
+	ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result");
+	ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result");
 
 cleanup:
 	modify_return__destroy(skel);
@@ -63,4 +55,3 @@ void serial_test_modify_return(void)
 		 0 /* want_side_effect */,
 		 -EINVAL /* want_ret */);
 }
-
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 6628710ec3c6..0bcccdc34fbc 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -6,23 +6,27 @@ void test_pkt_access(void)
 {
 	const char *file = "./test_pkt_access.o";
 	struct bpf_object *obj;
-	__u32 duration, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 100000,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv4",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv4 test_run_opts err");
+	ASSERT_OK(topts.retval, "ipv4 test_run_opts retval");
+
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = 0; /* reset from last call */
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv6 test_run_opts err");
+	ASSERT_OK(topts.retval, "ipv6 test_run_opts retval");
 
-	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv6",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index c9d2d6a1bfcc..00ee1dd792aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -6,18 +6,20 @@ void test_pkt_md_access(void)
 {
 	const char *file = "./test_pkt_md_access.o";
 	struct bpf_object *obj;
-	__u32 duration, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+	ASSERT_OK(topts.retval, "test_run_opts retval");
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
new file mode 100644
index 000000000000..1ccd2bdf8fa8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	int err;
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+		return;
+
+	CHECK(run_cnt != info.run_cnt, "run_cnt",
+	      "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_opts(void)
+{
+	struct test_pkt_access *skel;
+	int err, stats_fd = -1, prog_fd;
+	char buf[10] = {};
+	__u64 run_cnt = 0;
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.repeat = 1,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = 5,
+	);
+
+	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (!ASSERT_GE(stats_fd, 0, "enable_stats good fd"))
+		return;
+
+	skel = test_pkt_access__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_EQ(errno, ENOSPC, "test_run errno");
+	ASSERT_ERR(err, "test_run");
+	ASSERT_OK(topts.retval, "test_run retval");
+
+	ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), "test_run data_size_out");
+	ASSERT_EQ(buf[5], 0, "overflow, BPF_PROG_TEST_RUN ignored size hint");
+
+	run_cnt += topts.repeat;
+	check_run_cnt(prog_fd, run_cnt);
+
+	topts.data_out = NULL;
+	topts.data_size_out = 0;
+	topts.repeat = 2;
+	errno = 0;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(errno, "run_no_output errno");
+	ASSERT_OK(err, "run_no_output err");
+	ASSERT_OK(topts.retval, "run_no_output retval");
+
+	run_cnt += topts.repeat;
+	check_run_cnt(prog_fd, run_cnt);
+
+cleanup:
+	if (skel)
+		test_pkt_access__destroy(skel);
+	if (stats_fd >= 0)
+		close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
deleted file mode 100644
index 89fc98faf19e..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-#include "test_pkt_access.skel.h"
-
-static const __u32 duration;
-
-static void check_run_cnt(int prog_fd, __u64 run_cnt)
-{
-	struct bpf_prog_info info = {};
-	__u32 info_len = sizeof(info);
-	int err;
-
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-	if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
-		return;
-
-	CHECK(run_cnt != info.run_cnt, "run_cnt",
-	      "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
-}
-
-void test_prog_run_xattr(void)
-{
-	struct test_pkt_access *skel;
-	int err, stats_fd = -1;
-	char buf[10] = {};
-	__u64 run_cnt = 0;
-
-	struct bpf_prog_test_run_attr tattr = {
-		.repeat = 1,
-		.data_in = &pkt_v4,
-		.data_size_in = sizeof(pkt_v4),
-		.data_out = buf,
-		.data_size_out = 5,
-	};
-
-	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
-	if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
-		return;
-
-	skel = test_pkt_access__open_and_load();
-	if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
-		goto cleanup;
-
-	tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
-	      "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
-	CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
-	      "incorrect output size, want %zu have %u\n",
-	      sizeof(pkt_v4), tattr.data_size_out);
-
-	CHECK_ATTR(buf[5] != 0, "overflow",
-	      "BPF_PROG_TEST_RUN ignored size hint\n");
-
-	run_cnt += tattr.repeat;
-	check_run_cnt(tattr.prog_fd, run_cnt);
-
-	tattr.data_out = NULL;
-	tattr.data_size_out = 0;
-	tattr.repeat = 2;
-	errno = 0;
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err || errno || tattr.retval, "run_no_output",
-	      "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
-	tattr.data_size_out = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
-
-	run_cnt += tattr.repeat;
-	check_run_cnt(tattr.prog_fd, run_cnt);
-
-cleanup:
-	if (skel)
-		test_pkt_access__destroy(skel);
-	if (stats_fd >= 0)
-		close(stats_fd);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index b9822f914eeb..d2743fc10032 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -10,11 +10,18 @@ enum {
 static void test_queue_stack_map_by_type(int type)
 {
 	const int MAP_SIZE = 32;
-	__u32 vals[MAP_SIZE], duration, retval, size, val;
+	__u32 vals[MAP_SIZE], val;
 	int i, err, prog_fd, map_in_fd, map_out_fd;
 	char file[32], buf[128];
 	struct bpf_object *obj;
 	struct iphdr iph;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	/* Fill test values to be used */
 	for (i = 0; i < MAP_SIZE; i++)
@@ -58,38 +65,37 @@ static void test_queue_stack_map_by_type(int type)
 			pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
 		}
 
-		err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-					buf, &size, &retval, &duration);
-		if (err || retval || size != sizeof(pkt_v4))
+		topts.data_size_out = sizeof(buf);
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		if (err || topts.retval ||
+		    topts.data_size_out != sizeof(pkt_v4))
 			break;
 		memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
 		if (iph.daddr != val)
 			break;
 	}
 
-	CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val,
-	      "bpf_map_pop_elem",
-	      "err %d errno %d retval %d size %d iph->daddr %u\n",
-	      err, errno, retval, size, iph.daddr);
+	ASSERT_OK(err, "bpf_map_pop_elem");
+	ASSERT_OK(topts.retval, "bpf_map_pop_elem test retval");
+	ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+		  "bpf_map_pop_elem data_size_out");
+	ASSERT_EQ(iph.daddr, val, "bpf_map_pop_elem iph.daddr");
 
 	/* Queue is empty, program should return TC_ACT_SHOT */
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
-	      "check-queue-stack-map-empty",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	topts.data_size_out = sizeof(buf);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "check-queue-stack-map-empty");
+	ASSERT_EQ(topts.retval, 2  /* TC_ACT_SHOT */,
+		  "check-queue-stack-map-empty test retval");
+	ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+		  "check-queue-stack-map-empty data_size_out");
 
 	/* Check that the program pushed elements correctly */
 	for (i = 0; i < MAP_SIZE; i++) {
 		err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
-		if (err || val != vals[i] * 5)
-			break;
+		ASSERT_OK(err, "bpf_map_lookup_and_delete_elem");
+		ASSERT_EQ(val, vals[i] * 5, "bpf_map_push_elem val");
 	}
-
-	CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
-	      "bpf_map_push_elem", "err %d value %u\n", err, val);
-
 out:
 	pkt_v4.iph.saddr = 0;
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index 41720a62c4fa..fe5b8fae2c36 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -5,59 +5,54 @@
 #include "bpf/libbpf_internal.h"
 #include "test_raw_tp_test_run.skel.h"
 
-static int duration;
-
 void test_raw_tp_test_run(void)
 {
-	struct bpf_prog_test_run_attr test_attr = {};
 	int comm_fd = -1, err, nr_online, i, prog_fd;
 	__u64 args[2] = {0x1234ULL, 0x5678ULL};
 	int expected_retval = 0x1234 + 0x5678;
 	struct test_raw_tp_test_run *skel;
 	char buf[] = "new_name";
 	bool *online = NULL;
-	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
-			    .ctx_in = args,
-			    .ctx_size_in = sizeof(args),
-			    .flags = BPF_F_TEST_RUN_ON_CPU,
-		);
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.ctx_in = args,
+		.ctx_size_in = sizeof(args),
+		.flags = BPF_F_TEST_RUN_ON_CPU,
+	);
 
 	err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
 				  &nr_online);
-	if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+	if (!ASSERT_OK(err, "parse_cpu_mask_file"))
 		return;
 
 	skel = test_raw_tp_test_run__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		goto cleanup;
 
 	err = test_raw_tp_test_run__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "skel_attach"))
 		goto cleanup;
 
 	comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
-	if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+	if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm"))
 		goto cleanup;
 
 	err = write(comm_fd, buf, sizeof(buf));
-	CHECK(err < 0, "task rename", "err %d", errno);
+	ASSERT_GE(err, 0, "task rename");
 
-	CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
-	CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+	ASSERT_NEQ(skel->bss->count, 0, "check_count");
+	ASSERT_EQ(skel->data->on_cpu, 0xffffffff, "check_on_cpu");
 
 	prog_fd = bpf_program__fd(skel->progs.rename);
-	test_attr.prog_fd = prog_fd;
-	test_attr.ctx_in = args;
-	test_attr.ctx_size_in = sizeof(__u64);
+	opts.ctx_in = args;
+	opts.ctx_size_in = sizeof(__u64);
 
-	err = bpf_prog_test_run_xattr(&test_attr);
-	CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	ASSERT_NEQ(err, 0, "test_run should fail for too small ctx");
 
-	test_attr.ctx_size_in = sizeof(args);
-	err = bpf_prog_test_run_xattr(&test_attr);
-	CHECK(err < 0, "test_run", "err %d\n", errno);
-	CHECK(test_attr.retval != expected_retval, "check_retval",
-	      "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+	opts.ctx_size_in = sizeof(args);
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(opts.retval, expected_retval, "check_retval");
 
 	for (i = 0; i < nr_online; i++) {
 		if (!online[i])
@@ -66,28 +61,23 @@ void test_raw_tp_test_run(void)
 		opts.cpu = i;
 		opts.retval = 0;
 		err = bpf_prog_test_run_opts(prog_fd, &opts);
-		CHECK(err < 0, "test_run_opts", "err %d\n", errno);
-		CHECK(skel->data->on_cpu != i, "check_on_cpu",
-		      "expect %d got %d\n", i, skel->data->on_cpu);
-		CHECK(opts.retval != expected_retval,
-		      "check_retval", "expect 0x%x, got 0x%x\n",
-		      expected_retval, opts.retval);
+		ASSERT_OK(err, "test_run_opts");
+		ASSERT_EQ(skel->data->on_cpu, i, "check_on_cpu");
+		ASSERT_EQ(opts.retval, expected_retval, "check_retval");
 	}
 
 	/* invalid cpu ID should fail with ENXIO */
 	opts.cpu = 0xffffffff;
 	err = bpf_prog_test_run_opts(prog_fd, &opts);
-	CHECK(err >= 0 || errno != ENXIO,
-	      "test_run_opts_fail",
-	      "should failed with ENXIO\n");
+	ASSERT_EQ(errno, ENXIO, "test_run_opts should fail with ENXIO");
+	ASSERT_ERR(err, "test_run_opts_fail");
 
 	/* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
 	opts.cpu = 1;
 	opts.flags = 0;
 	err = bpf_prog_test_run_opts(prog_fd, &opts);
-	CHECK(err >= 0 || errno != EINVAL,
-	      "test_run_opts_fail",
-	      "should failed with EINVAL\n");
+	ASSERT_EQ(errno, EINVAL, "test_run_opts should fail with EINVAL");
+	ASSERT_ERR(err, "test_run_opts_fail");
 
 cleanup:
 	close(comm_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
index 239baccabccb..f4aa7dab4766 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -56,21 +56,23 @@ void serial_test_raw_tp_writable_test_run(void)
 		0,
 	};
 
-	__u32 prog_ret;
-	int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
-				    0, &prog_ret, 0);
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = test_skb,
+		.data_size_in = sizeof(test_skb),
+		.repeat = 1,
+	);
+	int err = bpf_prog_test_run_opts(filter_fd, &topts);
 	CHECK(err != 42, "test_run",
 	      "tracepoint did not modify return value\n");
-	CHECK(prog_ret != 0, "test_run_ret",
+	CHECK(topts.retval != 0, "test_run_ret",
 	      "socket_filter did not return 0\n");
 
 	close(tp_fd);
 
-	err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
-				&prog_ret, 0);
+	err = bpf_prog_test_run_opts(filter_fd, &topts);
 	CHECK(err != 0, "test_run_notrace",
 	      "test_run failed with %d errno %d\n", err, errno);
-	CHECK(prog_ret != 0, "test_run_ret_notrace",
+	CHECK(topts.retval != 0, "test_run_ret_notrace",
 	      "socket_filter did not return 0\n");
 
 out_filterfd:
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index aecfe662c070..70b49da5ca0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -13,10 +13,14 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
 	struct itimerval timeo = {
 		.it_value.tv_usec = 100000, /* 100ms */
 	};
-	__u32 duration = 0, retval;
 	int prog_fd;
 	int err;
 	int i;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 0xffffffff,
+	);
 
 	for (i = 0; i < ARRAY_SIZE(prog); i++)
 		prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
@@ -24,20 +28,17 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
 
 	prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog),
 				   "GPL", 0, NULL, 0);
-	CHECK(prog_fd < 0, "test-run", "errno %d\n", errno);
+	ASSERT_GE(prog_fd, 0, "test-run load");
 
 	err = sigaction(SIGALRM, &sigalrm_action, NULL);
-	CHECK(err, "test-run-signal-sigaction", "errno %d\n", errno);
+	ASSERT_OK(err, "test-run-signal-sigaction");
 
 	err = setitimer(ITIMER_REAL, &timeo, NULL);
-	CHECK(err, "test-run-signal-timer", "errno %d\n", errno);
-
-	err = bpf_prog_test_run(prog_fd, 0xffffffff, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(duration > 500000000, /* 500ms */
-	      "test-run-signal-duration",
-	      "duration %dns > 500ms\n",
-	      duration);
+	ASSERT_OK(err, "test-run-signal-timer");
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_LE(topts.duration, 500000000 /* 500ms */,
+		  "test-run-signal-duration");
 
 	signal(SIGALRM, SIG_DFL);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index b5319ba2ee27..ce0e555b5e38 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -20,97 +20,72 @@ void test_skb_ctx(void)
 		.gso_size = 10,
 		.hwtstamp = 11,
 	};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.ctx_in = &skb,
 		.ctx_size_in = sizeof(skb),
 		.ctx_out = &skb,
 		.ctx_size_out = sizeof(skb),
-	};
+	);
 	struct bpf_object *obj;
-	int err;
-	int i;
+	int err, prog_fd, i;
 
-	err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &tattr.prog_fd);
-	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+	err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS,
+				 &obj, &prog_fd);
+	if (!ASSERT_OK(err, "load"))
 		return;
 
 	/* ctx_in != NULL, ctx_size_in == 0 */
 
 	tattr.ctx_size_in = 0;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "ctx_size_in");
 	tattr.ctx_size_in = sizeof(skb);
 
 	/* ctx_out != NULL, ctx_size_out == 0 */
 
 	tattr.ctx_size_out = 0;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "ctx_size_out");
 	tattr.ctx_size_out = sizeof(skb);
 
 	/* non-zero [len, tc_index] fields should be rejected*/
 
 	skb.len = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "len");
 	skb.len = 0;
 
 	skb.tc_index = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "tc_index");
 	skb.tc_index = 0;
 
 	/* non-zero [hash, sk] fields should be rejected */
 
 	skb.hash = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "hash");
 	skb.hash = 0;
 
 	skb.sk = (struct bpf_sock *)1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "sk");
 	skb.sk = 0;
 
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != 0 || tattr.retval,
-		   "run",
-		   "err %d errno %d retval %d\n",
-		   err, errno, tattr.retval);
-
-	CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
-		   "ctx_size_out",
-		   "incorrect output size, want %zu have %u\n",
-		   sizeof(skb), tattr.ctx_size_out);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_OK(err, "test_run");
+	ASSERT_OK(tattr.retval, "test_run retval");
+	ASSERT_EQ(tattr.ctx_size_out, sizeof(skb), "ctx_size_out");
 
 	for (i = 0; i < 5; i++)
-		CHECK_ATTR(skb.cb[i] != i + 2,
-			   "ctx_out_cb",
-			   "skb->cb[i] == %d, expected %d\n",
-			   skb.cb[i], i + 2);
-	CHECK_ATTR(skb.priority != 7,
-		   "ctx_out_priority",
-		   "skb->priority == %d, expected %d\n",
-		   skb.priority, 7);
-	CHECK_ATTR(skb.ifindex != 1,
-		   "ctx_out_ifindex",
-		   "skb->ifindex == %d, expected %d\n",
-		   skb.ifindex, 1);
-	CHECK_ATTR(skb.ingress_ifindex != 11,
-		   "ctx_out_ingress_ifindex",
-		   "skb->ingress_ifindex == %d, expected %d\n",
-		   skb.ingress_ifindex, 11);
-	CHECK_ATTR(skb.tstamp != 8,
-		   "ctx_out_tstamp",
-		   "skb->tstamp == %lld, expected %d\n",
-		   skb.tstamp, 8);
-	CHECK_ATTR(skb.mark != 10,
-		   "ctx_out_mark",
-		   "skb->mark == %u, expected %d\n",
-		   skb.mark, 10);
+		ASSERT_EQ(skb.cb[i], i + 2, "ctx_out_cb");
+	ASSERT_EQ(skb.priority, 7, "ctx_out_priority");
+	ASSERT_EQ(skb.ifindex, 1, "ctx_out_ifindex");
+	ASSERT_EQ(skb.ingress_ifindex, 11, "ctx_out_ingress_ifindex");
+	ASSERT_EQ(skb.tstamp, 8, "ctx_out_tstamp");
+	ASSERT_EQ(skb.mark, 10, "ctx_out_mark");
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
index 6f802a1c0800..97dc8b14be48 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -9,22 +9,22 @@ void test_skb_helpers(void)
 		.gso_segs = 8,
 		.gso_size = 10,
 	};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.ctx_in = &skb,
 		.ctx_size_in = sizeof(skb),
 		.ctx_out = &skb,
 		.ctx_size_out = sizeof(skb),
-	};
+	);
 	struct bpf_object *obj;
-	int err;
+	int err, prog_fd;
 
-	err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &tattr.prog_fd);
-	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+	err = bpf_prog_test_load("./test_skb_helpers.o",
+				 BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (!ASSERT_OK(err, "load"))
 		return;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index 9fc040eaa482..9d211b5c22c4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 
+#define _GNU_SOURCE
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <unistd.h>
+#include <sched.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
@@ -20,6 +22,7 @@
 enum bpf_linum_array_idx {
 	EGRESS_LINUM_IDX,
 	INGRESS_LINUM_IDX,
+	READ_SK_DST_PORT_LINUM_IDX,
 	__NR_BPF_LINUM_ARRAY_IDX,
 };
 
@@ -42,8 +45,16 @@ static __u64 child_cg_id;
 static int linum_map_fd;
 static __u32 duration;
 
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+static bool create_netns(void)
+{
+	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+		return false;
+
+	if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+		return false;
+
+	return true;
+}
 
 static void print_sk(const struct bpf_sock *sk, const char *prefix)
 {
@@ -91,19 +102,24 @@ static void check_result(void)
 {
 	struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
 	struct bpf_sock srv_sk, cli_sk, listen_sk;
-	__u32 ingress_linum, egress_linum;
+	__u32 idx, ingress_linum, egress_linum, linum;
 	int err;
 
-	err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
-				  &egress_linum);
+	idx = EGRESS_LINUM_IDX;
+	err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
 	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
 	      "err:%d errno:%d\n", err, errno);
 
-	err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
-				  &ingress_linum);
+	idx = INGRESS_LINUM_IDX;
+	err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
 	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
 	      "err:%d errno:%d\n", err, errno);
 
+	idx = READ_SK_DST_PORT_LINUM_IDX;
+	err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
+	ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
+	ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
+
 	memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
 	memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
 	memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
@@ -262,7 +278,7 @@ static void test(void)
 	char buf[DATA_LEN];
 
 	/* Prepare listen_fd */
-	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
 	/* start_server() has logged the error details */
 	if (CHECK_FAIL(listen_fd == -1))
 		goto done;
@@ -330,8 +346,12 @@ done:
 
 void serial_test_sock_fields(void)
 {
-	struct bpf_link *egress_link = NULL, *ingress_link = NULL;
 	int parent_cg_fd = -1, child_cg_fd = -1;
+	struct bpf_link *link;
+
+	/* Use a dedicated netns to have a fixed listen port */
+	if (!create_netns())
+		return;
 
 	/* Create a cgroup, get fd, and join it */
 	parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
@@ -352,15 +372,20 @@ void serial_test_sock_fields(void)
 	if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
 		goto done;
 
-	egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
-						 child_cg_fd);
-	if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
+	link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
+		goto done;
+	skel->links.egress_read_sock_fields = link;
+
+	link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
 		goto done;
+	skel->links.ingress_read_sock_fields = link;
 
-	ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
-						  child_cg_fd);
-	if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
+	link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
 		goto done;
+	skel->links.read_sk_dst_port = link;
 
 	linum_map_fd = bpf_map__fd(skel->maps.linum_map);
 	sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
@@ -369,8 +394,7 @@ void serial_test_sock_fields(void)
 	test();
 
 done:
-	bpf_link__destroy(egress_link);
-	bpf_link__destroy(ingress_link);
+	test_sock_fields__detach(skel);
 	test_sock_fields__destroy(skel);
 	if (child_cg_fd >= 0)
 		close(child_cg_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index b97a8f236b3a..cec5c0882372 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -140,12 +140,16 @@ out:
 
 static void test_sockmap_update(enum bpf_map_type map_type)
 {
-	struct bpf_prog_test_run_attr tattr;
 	int err, prog, src, duration = 0;
 	struct test_sockmap_update *skel;
 	struct bpf_map *dst_map;
 	const __u32 zero = 0;
 	char dummy[14] = {0};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = dummy,
+		.data_size_in = sizeof(dummy),
+		.repeat = 1,
+	);
 	__s64 sk;
 
 	sk = connected_socket_v4();
@@ -167,16 +171,10 @@ static void test_sockmap_update(enum bpf_map_type map_type)
 	if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
 		goto out;
 
-	tattr = (struct bpf_prog_test_run_attr){
-		.prog_fd = prog,
-		.repeat = 1,
-		.data_in = dummy,
-		.data_size_in = sizeof(dummy),
-	};
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
-		       "errno=%u retval=%u\n", errno, tattr.retval))
+	err = bpf_prog_test_run_opts(prog, &topts);
+	if (!ASSERT_OK(err, "test_run"))
+		goto out;
+	if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
 		goto out;
 
 	compare_cookies(skel->maps.src, dst_map);
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 6307f5d2b417..8e329eaee6d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -4,14 +4,16 @@
 
 static void *spin_lock_thread(void *arg)
 {
-	__u32 duration, retval;
 	int err, prog_fd = *(u32 *) arg;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10000,
+	);
 
-	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_OK(topts.retval, "test_run retval");
 	pthread_exit(arg);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0a91d8d9954b..f45a1d7b0a28 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -42,7 +42,7 @@ retry:
 		return;
 
 	/* override program type */
-	bpf_program__set_perf_event(skel->progs.oncpu);
+	bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
 
 	err = test_stacktrace_build_id__load(skel);
 	if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
index 81e997a69f7a..f4d40001155a 100644
--- a/tools/testing/selftests/bpf/prog_tests/syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -20,20 +20,20 @@ void test_syscall(void)
 		.log_buf = (uintptr_t) verifier_log,
 		.log_size = sizeof(verifier_log),
 	};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
 		.ctx_in = &ctx,
 		.ctx_size_in = sizeof(ctx),
-	};
+	);
 	struct syscall *skel = NULL;
 	__u64 key = 12, value = 0;
-	int err;
+	int err, prog_fd;
 
 	skel = syscall__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto cleanup;
 
-	tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog);
-	err = bpf_prog_test_run_xattr(&tattr);
+	prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
 	ASSERT_EQ(err, 0, "err");
 	ASSERT_EQ(tattr.retval, 1, "retval");
 	ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 796f231582f8..c4da87ec3ba4 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -12,9 +12,13 @@ static void test_tailcall_1(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -54,20 +58,18 @@ static void test_tailcall_1(void)
 	}
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != i, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, i, "tailcall retval");
 
 		err = bpf_map_delete_elem(map_fd, &i);
 		if (CHECK_FAIL(err))
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 3, "tailcall retval");
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
@@ -85,10 +87,9 @@ static void test_tailcall_1(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		j = bpf_map__max_entries(prog_array) - 1 - i;
@@ -110,30 +111,27 @@ static void test_tailcall_1(void)
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		j = bpf_map__max_entries(prog_array) - 1 - i;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != j, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, j, "tailcall retval");
 
 		err = bpf_map_delete_elem(map_fd, &i);
 		if (CHECK_FAIL(err))
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 3, "tailcall retval");
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_map_delete_elem(map_fd, &i);
 		if (CHECK_FAIL(err >= 0 || errno != ENOENT))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != 3, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, 3, "tailcall retval");
 	}
 
 out:
@@ -150,9 +148,13 @@ static void test_tailcall_2(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -191,30 +193,27 @@ static void test_tailcall_2(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 2, "tailcall retval");
 
 	i = 2;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 3, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -225,8 +224,12 @@ static void test_tailcall_count(const char *which)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -262,10 +265,9 @@ static void test_tailcall_count(const char *which)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
@@ -277,18 +279,17 @@ static void test_tailcall_count(const char *which)
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
-	CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
-	      err, errno, val);
+	ASSERT_OK(err, "tailcall count");
+	ASSERT_EQ(val, 33, "tailcall count");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -319,10 +320,14 @@ static void test_tailcall_4(void)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	static const int zero = 0;
 	char buff[128] = {};
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -374,10 +379,9 @@ static void test_tailcall_4(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != i, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, i, "tailcall retval");
 	}
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
@@ -389,10 +393,9 @@ static void test_tailcall_4(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != 3, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, 3, "tailcall retval");
 	}
 out:
 	bpf_object__close(obj);
@@ -407,10 +410,14 @@ static void test_tailcall_5(void)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	static const int zero = 0;
 	char buff[128] = {};
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -462,10 +469,9 @@ static void test_tailcall_5(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != i, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, i, "tailcall retval");
 	}
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
@@ -477,10 +483,9 @@ static void test_tailcall_5(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != 3, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, 3, "tailcall retval");
 	}
 out:
 	bpf_object__close(obj);
@@ -495,8 +500,12 @@ static void test_tailcall_bpf2bpf_1(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -536,10 +545,9 @@ static void test_tailcall_bpf2bpf_1(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				0, &retval, &duration);
-	CHECK(err || retval != 1, "tailcall",
-	      "err %d errno %d retval %d\n", err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	/* jmp -> nop, call subprog that will do tailcall */
 	i = 1;
@@ -547,10 +555,9 @@ static void test_tailcall_bpf2bpf_1(void)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				0, &retval, &duration);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 
 	/* make sure that subprog can access ctx and entry prog that
 	 * called this subprog can properly return
@@ -560,11 +567,9 @@ static void test_tailcall_bpf2bpf_1(void)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				0, &retval, &duration);
-	CHECK(err || retval != sizeof(pkt_v4) * 2,
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -579,8 +584,12 @@ static void test_tailcall_bpf2bpf_2(void)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -616,10 +625,9 @@ static void test_tailcall_bpf2bpf_2(void)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
@@ -631,18 +639,17 @@ static void test_tailcall_bpf2bpf_2(void)
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
-	CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
-	      err, errno, val);
+	ASSERT_OK(err, "tailcall count");
+	ASSERT_EQ(val, 33, "tailcall count");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -657,8 +664,12 @@ static void test_tailcall_bpf2bpf_3(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -697,33 +708,27 @@ static void test_tailcall_bpf2bpf_3(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4) * 3,
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
 
 	i = 1;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4),
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4), "tailcall retval");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4) * 2,
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -754,8 +759,12 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -809,15 +818,14 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
-	CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n",
-	      err, errno, val.count);
+	ASSERT_OK(err, "tailcall count");
+	ASSERT_EQ(val.count, 31, "tailcall count");
 
 out:
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
index 37c20b5ffa70..61935e7e056a 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -3,18 +3,22 @@
 #include <test_progs.h>
 #include "test_task_pt_regs.skel.h"
 
+/* uprobe attach point */
+static void trigger_func(void)
+{
+	asm volatile ("");
+}
+
 void test_task_pt_regs(void)
 {
 	struct test_task_pt_regs *skel;
 	struct bpf_link *uprobe_link;
-	size_t uprobe_offset;
-	ssize_t base_addr;
+	ssize_t uprobe_offset;
 	bool match;
 
-	base_addr = get_base_addr();
-	if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+	uprobe_offset = get_uprobe_offset(&trigger_func);
+	if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
 		return;
-	uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
 
 	skel = test_task_pt_regs__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -32,7 +36,7 @@ void test_task_pt_regs(void)
 	skel->links.handle_uprobe = uprobe_link;
 
 	/* trigger & validate uprobe */
-	get_base_addr();
+	trigger_func();
 
 	if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
new file mode 100644
index 000000000000..c381faaae741
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <sys/prctl.h>
+#include <test_progs.h>
+#include "bpf_syscall_macro.skel.h"
+
+void test_bpf_syscall_macro(void)
+{
+	struct bpf_syscall_macro *skel = NULL;
+	int err;
+	int exp_arg1 = 1001;
+	unsigned long exp_arg2 = 12;
+	unsigned long exp_arg3 = 13;
+	unsigned long exp_arg4 = 14;
+	unsigned long exp_arg5 = 15;
+
+	/* check whether it can open program */
+	skel = bpf_syscall_macro__open();
+	if (!ASSERT_OK_PTR(skel, "bpf_syscall_macro__open"))
+		return;
+
+	skel->rodata->filter_pid = getpid();
+
+	/* check whether it can load program */
+	err = bpf_syscall_macro__load(skel);
+	if (!ASSERT_OK(err, "bpf_syscall_macro__load"))
+		goto cleanup;
+
+	/* check whether it can attach kprobe */
+	err = bpf_syscall_macro__attach(skel);
+	if (!ASSERT_OK(err, "bpf_syscall_macro__attach"))
+		goto cleanup;
+
+	/* check whether args of syscall are copied correctly */
+	prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
+#if defined(__aarch64__) || defined(__s390__)
+	ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#else
+	ASSERT_EQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#endif
+	ASSERT_EQ(skel->bss->arg2, exp_arg2, "syscall_arg2");
+	ASSERT_EQ(skel->bss->arg3, exp_arg3, "syscall_arg3");
+	/* it cannot copy arg4 when uses PT_REGS_PARM4 on x86_64 */
+#ifdef __x86_64__
+	ASSERT_NEQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#else
+	ASSERT_EQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#endif
+	ASSERT_EQ(skel->bss->arg4, exp_arg4, "syscall_arg4");
+	ASSERT_EQ(skel->bss->arg5, exp_arg5, "syscall_arg5");
+
+	/* check whether args of syscall are copied correctly for CORE variants */
+	ASSERT_EQ(skel->bss->arg1_core, exp_arg1, "syscall_arg1_core_variant");
+	ASSERT_EQ(skel->bss->arg2_core, exp_arg2, "syscall_arg2_core_variant");
+	ASSERT_EQ(skel->bss->arg3_core, exp_arg3, "syscall_arg3_core_variant");
+	/* it cannot copy arg4 when uses PT_REGS_PARM4_CORE on x86_64 */
+#ifdef __x86_64__
+	ASSERT_NEQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#else
+	ASSERT_EQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#endif
+	ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant");
+	ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant");
+
+	ASSERT_EQ(skel->bss->option_syscall, exp_arg1, "BPF_KPROBE_SYSCALL_option");
+	ASSERT_EQ(skel->bss->arg2_syscall, exp_arg2, "BPF_KPROBE_SYSCALL_arg2");
+	ASSERT_EQ(skel->bss->arg3_syscall, exp_arg3, "BPF_KPROBE_SYSCALL_arg3");
+	ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
+	ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
+
+cleanup:
+	bpf_syscall_macro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
index 4ca275101ee0..de24e8f0e738 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_profiler.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -8,20 +8,20 @@
 
 static int sanity_run(struct bpf_program *prog)
 {
-	struct bpf_prog_test_run_attr test_attr = {};
+	LIBBPF_OPTS(bpf_test_run_opts, test_attr);
 	__u64 args[] = {1, 2, 3};
-	__u32 duration = 0;
 	int err, prog_fd;
 
 	prog_fd = bpf_program__fd(prog);
-	test_attr.prog_fd = prog_fd;
 	test_attr.ctx_in = args;
 	test_attr.ctx_size_in = sizeof(args);
-	err = bpf_prog_test_run_xattr(&test_attr);
-	if (CHECK(err || test_attr.retval, "test_run",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, test_attr.retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &test_attr);
+	if (!ASSERT_OK(err, "test_run"))
+		return -1;
+
+	if (!ASSERT_OK(test_attr.retval, "test_run retval"))
 		return -1;
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
index cf1215531920..ae93411fd582 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -6,15 +6,18 @@
 
 static int sanity_run(struct bpf_program *prog)
 {
-	__u32 duration, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	prog_fd = bpf_program__fd(prog);
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval != 123, "test_run",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run"))
+		return -1;
+	if (!ASSERT_EQ(topts.retval, 123, "test_run retval"))
 		return -1;
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 0f4e49e622cd..7eb049214859 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -6,7 +6,7 @@
 static int timer(struct timer *timer_skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = timer__attach(timer_skel);
 	if (!ASSERT_OK(err, "timer_attach"))
@@ -16,10 +16,9 @@ static int timer(struct timer *timer_skel)
 	ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
 
 	prog_fd = bpf_program__fd(timer_skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 	timer__detach(timer_skel);
 
 	usleep(50); /* 10 usecs should be enough, but give it extra */
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 949a0617869d..2ee5f5ae11d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -6,19 +6,18 @@
 
 static int timer_mim(struct timer_mim *timer_skel)
 {
-	__u32 duration = 0, retval;
 	__u64 cnt1, cnt2;
 	int err, prog_fd, key1 = 1;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = timer_mim__attach(timer_skel);
 	if (!ASSERT_OK(err, "timer_attach"))
 		return err;
 
 	prog_fd = bpf_program__fd(timer_skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 	timer_mim__detach(timer_skel);
 
 	/* check that timer_cb[12] are incrementing 'cnt' */
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
index 924441d4362d..aabdff7bea3e 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_ext.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -23,8 +23,12 @@ void test_trace_ext(void)
 	int err, pkt_fd, ext_fd;
 	struct bpf_program *prog;
 	char buf[100];
-	__u32 retval;
 	__u64 len;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	/* open/load/attach test_pkt_md_access */
 	skel_pkt = test_pkt_md_access__open_and_load();
@@ -77,32 +81,32 @@ void test_trace_ext(void)
 
 	/* load/attach tracing */
 	err = test_trace_ext_tracing__load(skel_trace);
-	if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+	if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new load")) {
 		libbpf_strerror(err, buf, sizeof(buf));
 		fprintf(stderr, "%s\n", buf);
 		goto cleanup;
 	}
 
 	err = test_trace_ext_tracing__attach(skel_trace);
-	if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new attach"))
 		goto cleanup;
 
 	/* trigger the test */
-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+	err = bpf_prog_test_run_opts(pkt_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+	ASSERT_OK(topts.retval, "test_run_opts retval");
 
 	bss_ext = skel_ext->bss;
 	bss_trace = skel_trace->bss;
 
 	len = bss_ext->ext_called;
 
-	CHECK(bss_ext->ext_called == 0,
-		"check", "failed to trigger freplace/test_pkt_md_access\n");
-	CHECK(bss_trace->fentry_called != len,
-		"check", "failed to trigger fentry/test_pkt_md_access_new\n");
-	CHECK(bss_trace->fexit_called != len,
-		"check", "failed to trigger fexit/test_pkt_md_access_new\n");
+	ASSERT_NEQ(bss_ext->ext_called, 0,
+		  "failed to trigger freplace/test_pkt_md_access");
+	ASSERT_EQ(bss_trace->fentry_called, len,
+		  "failed to trigger fentry/test_pkt_md_access_new");
+	ASSERT_EQ(bss_trace->fexit_called, len,
+		   "failed to trigger fexit/test_pkt_md_access_new");
 
 cleanup:
 	test_trace_ext_tracing__destroy(skel_trace);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index ac65456b7ab8..ec21c53cb1da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -13,8 +13,14 @@ void test_xdp(void)
 	char buf[128];
 	struct ipv6hdr iph6;
 	struct iphdr iph;
-	__u32 duration, retval, size;
 	int err, prog_fd, map_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
@@ -26,21 +32,23 @@ void test_xdp(void)
 	bpf_map_update_elem(map_fd, &key4, &value4, 0);
 	bpf_map_update_elem(map_fd, &key6, &value6, 0);
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-	CHECK(err || retval != XDP_TX || size != 74 ||
-	      iph.protocol != IPPROTO_IPIP, "ipv4",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv4 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 74, "ipv4 test_run data_size_out");
+	ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 test_run iph.protocol");
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = sizeof(buf);
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6));
-	CHECK(err || retval != XDP_TX || size != 114 ||
-	      iph6.nexthdr != IPPROTO_IPV6, "ipv6",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv6 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 114, "ipv6 test_run data_size_out");
+	ASSERT_EQ(iph6.nexthdr, IPPROTO_IPV6, "ipv6 test_run iph6.nexthdr");
 out:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
index 31c188666e81..d18e6f343c48 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -2,15 +2,15 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 
-void test_xdp_update_frags(void)
+static void test_xdp_update_frags(void)
 {
 	const char *file = "./test_xdp_update_frags.o";
-	__u32 duration, retval, size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
 	int err, prog_fd;
 	__u32 *offset;
 	__u8 *buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	obj = bpf_object__open(file);
 	if (libbpf_get_error(obj))
@@ -32,12 +32,16 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 16 (head) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 31 (head) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 128,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 128;
+	topts.data_size_out = 128;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
 	ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
 
@@ -53,12 +57,16 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 5000 (frag0) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 5015 (frag0) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 9000;
+	topts.data_size_out = 9000;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
 	ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
 
@@ -68,12 +76,11 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 3510 (head) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 3525 (frag0) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
 	ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
 
@@ -83,12 +90,11 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 7606 (frag0) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 7621 (frag1) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
 	ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index ccc9e63254a8..21ceac24e174 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -5,26 +5,34 @@
 static void test_xdp_adjust_tail_shrink(void)
 {
 	const char *file = "./test_xdp_adjust_tail_shrink.o";
-	__u32 duration, retval, size, expect_sz;
+	__u32 expect_sz;
 	struct bpf_object *obj;
 	int err, prog_fd;
 	char buf[128];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv4");
-	ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
+	ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
 
 	expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = sizeof(buf);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv6");
-	ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
-	ASSERT_EQ(size, expect_sz, "ipv6 size");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+	ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
 
 	bpf_object__close(obj);
 }
@@ -34,24 +42,31 @@ static void test_xdp_adjust_tail_grow(void)
 	const char *file = "./test_xdp_adjust_tail_grow.o";
 	struct bpf_object *obj;
 	char buf[4096]; /* avoid segfault: large buf to hold grow results */
-	__u32 duration, retval, size, expect_sz;
+	__u32 expect_sz;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv4");
-	ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
+	ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
 
 	expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
-				buf, &size, &retval, &duration);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv6");
-	ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
-	ASSERT_EQ(size, expect_sz, "ipv6 size");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+	ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
 
 	bpf_object__close(obj);
 }
@@ -63,17 +78,17 @@ static void test_xdp_adjust_tail_grow2(void)
 	int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
 	struct bpf_object *obj;
 	int err, cnt, i;
-	int max_grow;
+	int max_grow, prog_fd;
 
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
 		.repeat		= 1,
 		.data_in	= &buf,
 		.data_out	= &buf,
 		.data_size_in	= 0, /* Per test */
 		.data_size_out	= 0, /* Per test */
-	};
+	);
 
-	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
+	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
@@ -82,7 +97,7 @@ static void test_xdp_adjust_tail_grow2(void)
 	tattr.data_size_in  =  64; /* Determine test case via pkt size */
 	tattr.data_size_out = 128; /* Limit copy_size */
 	/* Kernel side alloc packet memory area that is zero init */
-	err = bpf_prog_test_run_xattr(&tattr);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
 
 	ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
 	ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
@@ -100,7 +115,7 @@ static void test_xdp_adjust_tail_grow2(void)
 	memset(buf, 2, sizeof(buf));
 	tattr.data_size_in  = 128; /* Determine test case via pkt size */
 	tattr.data_size_out = sizeof(buf);   /* Copy everything */
-	err = bpf_prog_test_run_xattr(&tattr);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
 
 	max_grow = 4096 - XDP_PACKET_HEADROOM -	tailroom; /* 3520 */
 	ASSERT_OK(err, "case-128");
@@ -118,14 +133,15 @@ static void test_xdp_adjust_tail_grow2(void)
 	bpf_object__close(obj);
 }
 
-void test_xdp_adjust_frags_tail_shrink(void)
+static void test_xdp_adjust_frags_tail_shrink(void)
 {
 	const char *file = "./test_xdp_adjust_tail_shrink.o";
-	__u32 duration, retval, size, exp_size;
+	__u32 exp_size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
 	int err, prog_fd;
 	__u8 *buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	/* For the individual test cases, the first byte in the packet
 	 * indicates which test will be run.
@@ -148,46 +164,51 @@ void test_xdp_adjust_frags_tail_shrink(void)
 
 	/* Test case removing 10 bytes from last frag, NOT freeing it */
 	exp_size = 8990; /* 9000 - 10 */
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 9000;
+	topts.data_size_out = 9000;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb-10b");
-	ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
-	ASSERT_EQ(size, exp_size, "9Kb-10b size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb-10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-10b size");
 
 	/* Test case removing one of two pages, assuming 4K pages */
 	buf[0] = 1;
 	exp_size = 4900; /* 9000 - 4100 */
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+
+	topts.data_size_out = 9000; /* reset from previous invocation */
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb-4Kb");
-	ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
-	ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb-4Kb retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-4Kb size");
 
 	/* Test case removing two pages resulting in a linear xdp_buff */
 	buf[0] = 2;
 	exp_size = 800; /* 9000 - 8200 */
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	topts.data_size_out = 9000; /* reset from previous invocation */
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb-9Kb");
-	ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
-	ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb-9Kb retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-9Kb size");
 
 	free(buf);
 out:
 	bpf_object__close(obj);
 }
 
-void test_xdp_adjust_frags_tail_grow(void)
+static void test_xdp_adjust_frags_tail_grow(void)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.o";
-	__u32 duration, retval, size, exp_size;
+	__u32 exp_size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
 	int err, i, prog_fd;
 	__u8 *buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	obj = bpf_object__open(file);
 	if (libbpf_get_error(obj))
@@ -205,14 +226,17 @@ void test_xdp_adjust_frags_tail_grow(void)
 
 	/* Test case add 10 bytes to last frag */
 	memset(buf, 1, 16384);
-	size = 9000;
-	exp_size = size + 10;
-	err = bpf_prog_test_run(prog_fd, 1, buf, size,
-				buf, &size, &retval, &duration);
+	exp_size = 9000 + 10;
+
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 9000;
+	topts.data_size_out = 16384;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb+10b");
-	ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
-	ASSERT_EQ(size, exp_size, "9Kb+10b size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
 
 	for (i = 0; i < 9000; i++)
 		ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
@@ -225,14 +249,16 @@ void test_xdp_adjust_frags_tail_grow(void)
 
 	/* Test a too large grow */
 	memset(buf, 1, 16384);
-	size = 9001;
-	exp_size = size;
-	err = bpf_prog_test_run(prog_fd, 1, buf, size,
-				buf, &size, &retval, &duration);
+	exp_size = 9001;
+
+	topts.data_in = topts.data_out = buf;
+	topts.data_size_in = 9001;
+	topts.data_size_out = 16384;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb+10b");
-	ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
-	ASSERT_EQ(size, exp_size, "9Kb+10b size");
+	ASSERT_EQ(topts.retval, XDP_DROP, "9Kb+10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
 
 	free(buf);
 out:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index c6fa390e3aa1..62aa3edda5e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -11,8 +11,7 @@ void serial_test_xdp_attach(void)
 	const char *file = "./test_xdp.o";
 	struct bpf_prog_info info = {};
 	int err, fd1, fd2, fd3;
-	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
-			    .old_fd = -1);
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 
 	len = sizeof(info);
 
@@ -38,49 +37,47 @@ void serial_test_xdp_attach(void)
 	if (CHECK_FAIL(err))
 		goto out_2;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
-				       &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(err, "load_ok", "initial load failed"))
 		goto out_close;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id1, "id1_check",
 		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
-				       &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
 		goto out;
 
-	opts.old_fd = fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+	opts.old_prog_fd = fd1;
+	err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts);
 	if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
 		goto out;
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id2, "id2_check",
 		  "loaded prog id %u != id2 %u, err %d", id0, id2, err))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts);
 	if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
 		goto out;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
 	if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
 		goto out;
 
-	opts.old_fd = fd2;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	opts.old_prog_fd = fd2;
+	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
 	if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != 0, "unload_check",
 		  "loaded prog id %u != 0, err %d", id0, err))
 		goto out_close;
 out:
-	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 out_close:
 	bpf_object__close(obj3);
 out_2:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 9c395ea680c6..76967d8ace9c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -45,9 +45,9 @@ static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
 				     struct test_xdp_bpf2bpf *ftrace_skel,
 				     int pkt_size)
 {
-	__u32 duration = 0, retval, size;
 	__u8 *buf, *buf_in;
 	int err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
 	    !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
@@ -73,12 +73,16 @@ static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
 	}
 
 	/* Run test program */
-	err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf_in;
+	topts.data_size_in = pkt_size;
+	topts.data_out = buf;
+	topts.data_size_out = BUF_SZ;
+
+	err = bpf_prog_test_run_opts(pkt_fd, &topts);
 
 	ASSERT_OK(err, "ipv4");
-	ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
-	ASSERT_EQ(size, pkt_size, "ipv4 size");
+	ASSERT_EQ(topts.retval, XDP_PASS, "ipv4 retval");
+	ASSERT_EQ(topts.data_size_out, pkt_size, "ipv4 size");
 
 	/* Make sure bpf_xdp_output() was triggered and it sent the expected
 	 * data to the perf ring buffer.
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index 13aabb3b6cf2..f775a1613833 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -8,7 +8,7 @@
 
 #define IFINDEX_LO	1
 
-void test_xdp_with_cpumap_helpers(void)
+static void test_xdp_with_cpumap_helpers(void)
 {
 	struct test_xdp_with_cpumap_helpers *skel;
 	struct bpf_prog_info info = {};
@@ -24,11 +24,11 @@ void test_xdp_with_cpumap_helpers(void)
 		return;
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 	ASSERT_OK(err, "XDP program detach");
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
@@ -46,9 +46,9 @@ void test_xdp_with_cpumap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
 
 	/* can not attach BPF_XDP_CPUMAP program to a device */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
-		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 
 	val.qsize = 192;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
@@ -68,7 +68,7 @@ out_close:
 	test_xdp_with_cpumap_helpers__destroy(skel);
 }
 
-void test_xdp_with_cpumap_frags_helpers(void)
+static void test_xdp_with_cpumap_frags_helpers(void)
 {
 	struct test_xdp_with_cpumap_frags_helpers *skel;
 	struct bpf_prog_info info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 2a784ccd3136..ead40016c324 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -26,11 +26,11 @@ static void test_xdp_with_devmap_helpers(void)
 		return;
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 	ASSERT_OK(err, "XDP program detach");
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
@@ -48,9 +48,9 @@ static void test_xdp_with_devmap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
 
 	/* can not attach BPF_XDP_DEVMAP program to a device */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
-		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 
 	val.ifindex = 1;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
@@ -81,7 +81,7 @@ static void test_neg_xdp_devmap_helpers(void)
 	}
 }
 
-void test_xdp_with_devmap_frags_helpers(void)
+static void test_xdp_with_devmap_frags_helpers(void)
 {
 	struct test_xdp_with_devmap_frags_helpers *skel;
 	struct bpf_prog_info info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index abe48e82e1dc..0d01ff6cb91a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -14,13 +14,13 @@ void serial_test_xdp_info(void)
 
 	/* Get prog_id for XDP_ATTACHED_NONE mode */
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
 	if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
 		return;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
@@ -37,32 +37,32 @@ void serial_test_xdp_info(void)
 	if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
 		goto out_close;
 
 	/* Get prog_id for single prog mode */
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
 	if (CHECK(err, "get_xdp", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
 		goto out;
 
 out:
-	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 out_close:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index b2b357f8c74c..3e9d5c5521f0 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -8,9 +8,9 @@
 
 void serial_test_xdp_link(void)
 {
-	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
 	struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
 	__u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 	struct bpf_link_info link_info;
 	struct bpf_prog_info prog_info;
 	struct bpf_link *link;
@@ -41,12 +41,12 @@ void serial_test_xdp_link(void)
 	id2 = prog_info.id;
 
 	/* set initial prog attachment */
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
 	if (!ASSERT_OK(err, "fd_attach"))
 		goto cleanup;
 
 	/* validate prog ID */
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
 		goto cleanup;
 
@@ -55,14 +55,14 @@ void serial_test_xdp_link(void)
 	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
 		bpf_link__destroy(link);
 		/* best-effort detach prog */
-		opts.old_fd = prog_fd1;
-		bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+		opts.old_prog_fd = prog_fd1;
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
 		goto cleanup;
 	}
 
 	/* detach BPF program */
-	opts.old_fd = prog_fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+	opts.old_prog_fd = prog_fd1;
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
 	if (!ASSERT_OK(err, "prog_detach"))
 		goto cleanup;
 
@@ -73,23 +73,23 @@ void serial_test_xdp_link(void)
 	skel1->links.xdp_handler = link;
 
 	/* validate prog ID */
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
 		goto cleanup;
 
 	/* BPF prog attach is not allowed to replace BPF link */
-	opts.old_fd = prog_fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+	opts.old_prog_fd = prog_fd1;
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
 	if (!ASSERT_ERR(err, "prog_attach_fail"))
 		goto cleanup;
 
 	/* Can't force-update when BPF link is active */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL);
 	if (!ASSERT_ERR(err, "prog_update_fail"))
 		goto cleanup;
 
 	/* Can't force-detach when BPF link is active */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	err = bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 	if (!ASSERT_ERR(err, "prog_detach_fail"))
 		goto cleanup;
 
@@ -109,7 +109,7 @@ void serial_test_xdp_link(void)
 		goto cleanup;
 	skel2->links.xdp_handler = link;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 0281095de266..92ef0aa50866 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -25,43 +25,49 @@ void test_xdp_noinline(void)
 		__u8 flags;
 	} real_def = {.dst = MAGIC_VAL};
 	__u32 ch_key = 11, real_num = 3;
-	__u32 duration = 0, retval, size;
 	int err, i;
 	__u64 bytes = 0, pkts = 0;
 	char buf[128];
 	u32 *magic = (u32 *)buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = NUM_ITER,
+	);
 
 	skel = test_xdp_noinline__open_and_load();
-	if (CHECK(!skel, "skel_open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
 		return;
 
 	bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
 	bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
 	bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
-				NUM_ITER, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 1 || size != 54 ||
-	      *magic != MAGIC_VAL, "ipv4",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v4), &topts);
+	ASSERT_OK(err, "ipv4 test_run");
+	ASSERT_EQ(topts.retval, 1, "ipv4 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 test_run magic");
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
-				NUM_ITER, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 1 || size != 74 ||
-	      *magic != MAGIC_VAL, "ipv6",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_out = buf;
+	topts.data_size_out = sizeof(buf);
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v6), &topts);
+	ASSERT_OK(err, "ipv6 test_run");
+	ASSERT_EQ(topts.retval, 1, "ipv6 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 test_run magic");
 
 	bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
 	for (i = 0; i < nr_cpus; i++) {
 		bytes += stats[i].bytes;
 		pkts += stats[i].pkts;
 	}
-	CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
-	      "stats", "bytes %lld pkts %lld\n",
-	      (unsigned long long)bytes, (unsigned long long)pkts);
+	ASSERT_EQ(bytes, MAGIC_BYTES * NUM_ITER * 2, "stats bytes");
+	ASSERT_EQ(pkts, NUM_ITER * 2, "stats pkts");
 	test_xdp_noinline__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
index 15a3900e4370..f543d1bd21b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -4,22 +4,25 @@
 void test_xdp_perf(void)
 {
 	const char *file = "./xdp_dummy.o";
-	__u32 duration, retval, size;
 	struct bpf_object *obj;
 	char in[128], out[128];
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = in,
+		.data_size_in = sizeof(in),
+		.data_out = out,
+		.data_size_out = sizeof(out),
+		.repeat = 1000000,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
-				out, &size, &retval, &duration);
-
-	CHECK(err || retval != XDP_PASS || size != 128,
-	      "xdp-perf",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, XDP_PASS, "test_run retval");
+	ASSERT_EQ(topts.data_size_out, 128, "test_run data_size_out");
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
index d9a88dd1ea65..7efcbdbe772d 100644
--- a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
@@ -5,6 +5,7 @@
 #include <linux/bpf.h>
 #include <stdbool.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -87,7 +88,7 @@ bloom_callback(struct bpf_map *map, __u32 *key, void *val,
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bloom_lookup(void *ctx)
 {
 	struct callback_ctx data;
@@ -100,7 +101,7 @@ int bloom_lookup(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bloom_update(void *ctx)
 {
 	struct callback_ctx data;
@@ -113,7 +114,7 @@ int bloom_update(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bloom_hashmap_lookup(void *ctx)
 {
 	__u64 *result;
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
index 1316f3db79d9..f245fcfe0c61 100644
--- a/tools/testing/selftests/bpf/progs/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -51,7 +52,7 @@ check_elem(struct bpf_map *map, __u32 *key, __u32 *val,
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int inner_map(void *ctx)
 {
 	struct bpf_map *inner_map;
@@ -70,7 +71,7 @@ int inner_map(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int check_bloom(void *ctx)
 {
 	struct callback_ctx data;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..d22741272692 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+int num_expected_failure_copy_from_user_task = 0;
+int num_success_copy_from_user_task = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int ret;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Read an invalid pointer and ensure we get an error */
+	ptr = NULL;
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		++num_expected_failure_copy_from_user_task;
+	} else {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Try to read the contents of the task's instruction pointer from the
+	 * remote task's address space.
+	 */
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	++num_success_copy_from_user_task;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
index 12349e4601e8..e08565282759 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -3,6 +3,7 @@
 
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -53,7 +54,7 @@ static int nested_callback1(__u32 index, void *data)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int test_prog(void *ctx)
 {
 	struct callback_ctx data = {};
@@ -71,7 +72,7 @@ int test_prog(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int prog_null_ctx(void *ctx)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
@@ -82,7 +83,7 @@ int prog_null_ctx(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int prog_invalid_flags(void *ctx)
 {
 	struct callback_ctx data = {};
@@ -95,7 +96,7 @@ int prog_invalid_flags(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int prog_nested_calls(void *ctx)
 {
 	struct callback_ctx data = {};
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
index 9dafdc244462..4ce76eb064c4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -3,6 +3,7 @@
 
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -14,7 +15,7 @@ static int empty_callback(__u32 index, void *data)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int benchmark(void *ctx)
 {
 	for (int i = 0; i < 1000; i++) {
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
new file mode 100644
index 000000000000..5bb11fe595a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_MISC_H__
+#define __BPF_MISC_H__
+
+#if defined(__TARGET_ARCH_x86)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__x64_"
+#elif defined(__TARGET_ARCH_s390)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__s390x_"
+#elif defined(__TARGET_ARCH_arm64)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__arm64_"
+#else
+#define SYSCALL_WRAPPER 0
+#define SYS_PREFIX "__se_"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
new file mode 100644
index 000000000000..05838ed9b89c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <vmlinux.h>
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+int arg1 = 0;
+unsigned long arg2 = 0;
+unsigned long arg3 = 0;
+unsigned long arg4_cx = 0;
+unsigned long arg4 = 0;
+unsigned long arg5 = 0;
+
+int arg1_core = 0;
+unsigned long arg2_core = 0;
+unsigned long arg3_core = 0;
+unsigned long arg4_core_cx = 0;
+unsigned long arg4_core = 0;
+unsigned long arg5_core = 0;
+
+int option_syscall = 0;
+unsigned long arg2_syscall = 0;
+unsigned long arg3_syscall = 0;
+unsigned long arg4_syscall = 0;
+unsigned long arg5_syscall = 0;
+
+const volatile pid_t filter_pid = 0;
+
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE(handle_sys_prctl)
+{
+	struct pt_regs *real_regs;
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	unsigned long tmp = 0;
+
+	if (pid != filter_pid)
+		return 0;
+
+	real_regs = PT_REGS_SYSCALL_REGS(ctx);
+
+	/* test for PT_REGS_PARM */
+
+#if !defined(bpf_target_arm64) && !defined(bpf_target_s390)
+	bpf_probe_read_kernel(&tmp, sizeof(tmp), &PT_REGS_PARM1_SYSCALL(real_regs));
+#endif
+	arg1 = tmp;
+	bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs));
+	bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs));
+	bpf_probe_read_kernel(&arg4_cx, sizeof(arg4_cx), &PT_REGS_PARM4(real_regs));
+	bpf_probe_read_kernel(&arg4, sizeof(arg4), &PT_REGS_PARM4_SYSCALL(real_regs));
+	bpf_probe_read_kernel(&arg5, sizeof(arg5), &PT_REGS_PARM5_SYSCALL(real_regs));
+
+	/* test for the CORE variant of PT_REGS_PARM */
+	arg1_core = PT_REGS_PARM1_CORE_SYSCALL(real_regs);
+	arg2_core = PT_REGS_PARM2_CORE_SYSCALL(real_regs);
+	arg3_core = PT_REGS_PARM3_CORE_SYSCALL(real_regs);
+	arg4_core_cx = PT_REGS_PARM4_CORE(real_regs);
+	arg4_core = PT_REGS_PARM4_CORE_SYSCALL(real_regs);
+	arg5_core = PT_REGS_PARM5_CORE_SYSCALL(real_regs);
+
+	return 0;
+}
+
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2,
+		       unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != filter_pid)
+		return 0;
+
+	option_syscall = option;
+	arg2_syscall = arg2;
+	arg3_syscall = arg3;
+	arg4_syscall = arg4;
+	arg5_syscall = arg5;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_user.c b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
new file mode 100644
index 000000000000..5523f77c5a44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+	int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+	struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+int g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_1")
+int BPF_PROG(test_user1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+	g = arg->a;
+	return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_2")
+int BPF_PROG(test_user2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+	g = arg->p->a;
+	return 0;
+}
+
+/* int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ *                       int __user *usockaddr_len);
+ */
+SEC("fentry/__sys_getsockname")
+int BPF_PROG(test_sys_getsockname, int fd, struct sockaddr *usockaddr,
+	     int *usockaddr_len)
+{
+	g = usockaddr->sa_family;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c
index 13499cc15c7d..2715fe27d4cf 100644
--- a/tools/testing/selftests/bpf/progs/core_kern.c
+++ b/tools/testing/selftests/bpf/progs/core_kern.c
@@ -101,4 +101,20 @@ int balancer_ingress(struct __sk_buff *ctx)
 	return 0;
 }
 
+typedef int (*func_proto_typedef___match)(long);
+typedef int (*func_proto_typedef___doesnt_match)(char *);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef___match);
+
+int proto_out[3];
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+	proto_out[0] = bpf_core_type_exists(func_proto_typedef___match);
+	proto_out[1] = bpf_core_type_exists(func_proto_typedef___doesnt_match);
+	proto_out[2] = bpf_core_type_exists(func_proto_typedef_nested1);
+
+	return 0;
+}
+
 char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_kern_overflow.c b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
new file mode 100644
index 000000000000..f0d5652256ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
+int proto_out;
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+	proto_out = bpf_core_type_exists(func_proto_typedef_nested2);
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
index bca92c9bd29a..106dc75efcc4 100644
--- a/tools/testing/selftests/bpf/progs/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -3,6 +3,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char LICENSE[] SEC("license") = "GPL";
 
@@ -10,8 +11,8 @@ int pid = 0;
 int fentry_cnt = 0;
 int fexit_cnt = 0;
 
-SEC("fentry/__x64_sys_nanosleep")
-int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fentry(void *ctx)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return 0;
@@ -20,8 +21,8 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
 	return 0;
 }
 
-SEC("fexit/__x64_sys_nanosleep")
-int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret)
+SEC("fexit/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fexit(void *ctx)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
index e5ab4836a641..45204fe0c570 100644
--- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -18,7 +19,7 @@ const volatile int batch_cnt = 0;
 long sample_val = 42;
 long dropped __attribute__((aligned(128))) = 0;
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bench_perfbuf(void *ctx)
 {
 	__u64 *sample;
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 123607d314d6..6a468496f539 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -30,7 +31,7 @@ static __always_inline long get_flags()
 	return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bench_ringbuf(void *ctx)
 {
 	long *sample, flags;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d0298dccedcd..c8d810010a94 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -72,7 +72,8 @@ int _getsockopt(struct bpf_sockopt *ctx)
 		 * reasons.
 		 */
 
-		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
+		/* Check that optval contains address (__u64) */
+		if (optval + sizeof(__u64) > optval_end)
 			return 0; /* bounds check */
 
 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
diff --git a/tools/testing/selftests/bpf/progs/btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
index c88ccc53529a..c88ccc53529a 100644
--- a/tools/testing/selftests/bpf/progs/btf_decl_tag.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index 8812a90da4eb..702578a5e496 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -7,20 +7,7 @@
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-
-#if defined(__TARGET_ARCH_x86)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__x64_"
-#elif defined(__TARGET_ARCH_s390)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__s390x_"
-#elif defined(__TARGET_ARCH_arm64)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__arm64_"
-#else
-#define SYSCALL_WRAPPER 0
-#define SYS_PREFIX ""
-#endif
+#include "bpf_misc.h"
 
 static struct sockaddr_in old;
 
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index eaa7d9dba0be..5bdc0d38efc0 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -35,7 +36,7 @@ long prod_pos = 0;
 /* inner state */
 long seq = 0;
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int test_ringbuf(void *ctx)
 {
 	int cur_pid = bpf_get_current_pid_tgid() >> 32;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 83b0aaa52ef7..bf5b7caefdd0 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -392,6 +392,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
 	int err, family;
+	__u32 val_u32;
 	bool v4;
 
 	v4 = (ctx->family == AF_INET);
@@ -418,6 +419,11 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 	if (LSW(ctx->remote_port, 0) != SRC_PORT)
 		return SK_DROP;
 
+	/* Load from remote_port field with zero padding (backward compatibility) */
+	val_u32 = *(__u32 *)&ctx->remote_port;
+	if (val_u32 != bpf_htonl(bpf_ntohs(SRC_PORT) << 16))
+		return SK_DROP;
+
 	/* Narrow loads from local_port field. Expect DST_PORT. */
 	if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
 	    LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 81b57b9aaaea..246f1f001813 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -12,6 +12,7 @@
 enum bpf_linum_array_idx {
 	EGRESS_LINUM_IDX,
 	INGRESS_LINUM_IDX,
+	READ_SK_DST_PORT_LINUM_IDX,
 	__NR_BPF_LINUM_ARRAY_IDX,
 };
 
@@ -250,4 +251,44 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
 	return CG_OK;
 }
 
+static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
+{
+	__u32 *word = (__u32 *)&sk->dst_port;
+	return word[0] == bpf_htonl(0xcafe0000);
+}
+
+static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
+{
+	__u16 *half = (__u16 *)&sk->dst_port;
+	return half[0] == bpf_htons(0xcafe);
+}
+
+static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
+{
+	__u8 *byte = (__u8 *)&sk->dst_port;
+	return byte[0] == 0xca && byte[1] == 0xfe;
+}
+
+SEC("cgroup_skb/egress")
+int read_sk_dst_port(struct __sk_buff *skb)
+{
+	__u32 linum, linum_idx;
+	struct bpf_sock *sk;
+
+	linum_idx = READ_SK_DST_PORT_LINUM_IDX;
+
+	sk = skb->sk;
+	if (!sk)
+		RET_LOG();
+
+	if (!sk_dst_port__load_word(sk))
+		RET_LOG();
+	if (!sk_dst_port__load_half(sk))
+		RET_LOG();
+	if (!sk_dst_port__load_byte(sk))
+		RET_LOG();
+
+	return CG_OK;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
index 62fb7cd4d87a..97ed625bb70a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -12,7 +12,7 @@ struct {
 	__uint(max_entries, 4);
 } cpu_map SEC(".maps");
 
-SEC("xdp_cpumap/dummy_cm")
+SEC("xdp/cpumap")
 int xdp_dummy_cm(struct xdp_md *ctx)
 {
 	return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 48007f17dfa8..20ec6723df18 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -24,7 +24,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
 	return XDP_PASS;
 }
 
-SEC("xdp_cpumap/dummy_cm")
+SEC("xdp/cpumap")
 int xdp_dummy_cm(struct xdp_md *ctx)
 {
 	if (ctx->ingress_ifindex == IFINDEX_LO)
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
index e1caf510b7d2..cdcf7de7ec8c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -12,7 +12,7 @@ struct {
 /* valid program on DEVMAP entry via SEC name;
  * has access to egress and ingress ifindex
  */
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
 int xdp_dummy_dm(struct xdp_md *ctx)
 {
 	return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 8ae11fab8316..4139a14f9996 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -27,7 +27,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
 /* valid program on DEVMAP entry via SEC name;
  * has access to egress and ingress ifindex
  */
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
 int xdp_dummy_dm(struct xdp_md *ctx)
 {
 	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 119582aa105a..6695478c2b25 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -12,7 +13,7 @@ int trace_printk_ran = 0;
 
 const char fmt[] = "Testing,testing %d\n";
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int sys_enter(void *ctx)
 {
 	trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c
index d327241ba047..969306cd4f33 100644
--- a/tools/testing/selftests/bpf/progs/trace_vprintk.c
+++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -11,7 +12,7 @@ int null_data_vprintk_ret = 0;
 int trace_vprintk_ret = 0;
 int trace_vprintk_ran = 0;
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int sys_enter(void *ctx)
 {
 	static const char one[] = "1";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2098f3f27f18..2ab049b54d6c 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -5,6 +5,7 @@
 #include <asm/unistd.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -25,28 +26,28 @@ int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
 	return 0;
 }
 
-SEC("kprobe/__x64_sys_getpgid")
+SEC("kprobe/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_kprobe(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_fentry(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
 	return 0;
 }
 
-SEC("fentry.s/__x64_sys_getpgid")
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_fentry_sleep(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
 	return 0;
 }
 
-SEC("fmod_ret/__x64_sys_getpgid")
+SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_fmodret(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 8395782b6e0a..97b26a30b59a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -70,7 +70,7 @@ int xdp_redirect_map_all_prog(struct xdp_md *ctx)
 				BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
 }
 
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
 int xdp_devmap_prog(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index b9f1bbbc8aba..6e6235185a86 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -61,7 +61,11 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
 	};
 	__u8 data[64] = {};
 	int mfd, pfd, ret, zero = 0;
-	__u32 retval = 0;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = data,
+		.data_size_in = sizeof(data),
+		.repeat = 1,
+	);
 
 	mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL);
 	if (mfd < 0)
@@ -75,9 +79,8 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
 		return -1;
 	}
 
-	ret = bpf_prog_test_run(pfd, 1, data, sizeof(data),
-				NULL, NULL, &retval, NULL);
-	if (ret < 0 || retval != 42) {
+	ret = bpf_prog_test_run_opts(pfd, &topts);
+	if (ret < 0 || topts.retval != 42) {
 		ret = -1;
 	} else {
 		assert(!bpf_map_lookup_elem(mfd, &zero, value));
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 5620919fde9e..826f4423ce02 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -23,6 +23,12 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
+readonly NS4="ns4-$(mktemp -u XXXXXX)"
+readonly NS5="ns5-$(mktemp -u XXXXXX)"
+readonly NS6="ns6-$(mktemp -u XXXXXX)"
 
 msg="skip all tests:"
 if [ $UID != 0 ]; then
@@ -41,23 +47,23 @@ cleanup()
 	fi
 
 	set +e
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
-	ip netns del ns3 2> /dev/null
-	ip netns del ns4 2> /dev/null
-	ip netns del ns5 2> /dev/null
-	ip netns del ns6 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
+	ip netns del ${NS3} 2> /dev/null
+	ip netns del ${NS4} 2> /dev/null
+	ip netns del ${NS5} 2> /dev/null
+	ip netns del ${NS6} 2> /dev/null
 	rm -f $TMP_FILE
 }
 
 set -e
 
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
-ip netns add ns4
-ip netns add ns5
-ip netns add ns6
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
+ip netns add ${NS4}
+ip netns add ${NS5}
+ip netns add ${NS6}
 
 trap cleanup 0 2 3 6 9
 
@@ -67,78 +73,78 @@ ip link add veth5 type veth peer name veth6
 ip link add veth7 type veth peer name veth8
 ip link add veth9 type veth peer name veth10
 
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
-ip link set veth3 netns ns2
-ip link set veth4 netns ns3
-ip link set veth5 netns ns3
-ip link set veth6 netns ns4
-ip link set veth7 netns ns4
-ip link set veth8 netns ns5
-ip link set veth9 netns ns5
-ip link set veth10 netns ns6
-
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
-ip netns exec ns2 ip link set dev veth3 up
-ip netns exec ns3 ip link set dev veth4 up
-ip netns exec ns3 ip link set dev veth5 up
-ip netns exec ns4 ip link set dev veth6 up
-ip netns exec ns4 ip link set dev veth7 up
-ip netns exec ns5 ip link set dev veth8 up
-ip netns exec ns5 ip link set dev veth9 up
-ip netns exec ns6 ip link set dev veth10 up
-ip netns exec ns6 ip link set dev lo up
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
+ip link set veth3 netns ${NS2}
+ip link set veth4 netns ${NS3}
+ip link set veth5 netns ${NS3}
+ip link set veth6 netns ${NS4}
+ip link set veth7 netns ${NS4}
+ip link set veth8 netns ${NS5}
+ip link set veth9 netns ${NS5}
+ip link set veth10 netns ${NS6}
+
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
+ip netns exec ${NS2} ip link set dev veth3 up
+ip netns exec ${NS3} ip link set dev veth4 up
+ip netns exec ${NS3} ip link set dev veth5 up
+ip netns exec ${NS4} ip link set dev veth6 up
+ip netns exec ${NS4} ip link set dev veth7 up
+ip netns exec ${NS5} ip link set dev veth8 up
+ip netns exec ${NS5} ip link set dev veth9 up
+ip netns exec ${NS6} ip link set dev veth10 up
+ip netns exec ${NS6} ip link set dev lo up
 
 # All link scope addresses and routes required between veths
-ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
-ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
-ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
-
-ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
-ip netns exec ns4 ip -6 addr add fc42::1 dev lo
-ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
-
-ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
-ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
+ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
+ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
+ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
 sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
 kill -TERM $!
 
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 50f7e74ca0b9..cbebfaa7c1e8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -738,7 +738,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 			    sizeof(key), sizeof(value),
 			    6, NULL);
 	if (fd < 0) {
-		if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) {
+		if (!libbpf_probe_bpf_map_type(BPF_MAP_TYPE_SOCKMAP, NULL)) {
 			printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n",
 			       __func__);
 			skips++;
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 6413c1472554..102e6588e2fe 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -4,6 +4,7 @@
 # Copyright (c) 2019 Cloudflare
 
 set -eu
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
 
 wait_for_ip()
 {
@@ -28,12 +29,12 @@ get_prog_id()
 
 ns1_exec()
 {
-	ip netns exec ns1 "$@"
+	ip netns exec ${NS1} "$@"
 }
 
 setup()
 {
-	ip netns add ns1
+	ip netns add ${NS1}
 	ns1_exec ip link set lo up
 
 	ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 29bbaa58233c..92e3465fbae8 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -456,7 +456,7 @@ static int probe_filter_length(const struct bpf_insn *fp)
 
 static bool skip_unsupported_map(enum bpf_map_type map_type)
 {
-	if (!bpf_probe_map_type(map_type, 0)) {
+	if (!libbpf_probe_bpf_map_type(map_type, NULL)) {
 		printf("SKIP (unsupported map type %d)\n", map_type);
 		skips++;
 		return true;
@@ -1021,13 +1021,18 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 {
 	__u8 tmp[TEST_DATA_LEN << 2];
 	__u32 size_tmp = sizeof(tmp);
-	uint32_t retval;
 	int err, saved_errno;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = data,
+		.data_size_in = size_data,
+		.data_out = tmp,
+		.data_size_out = size_tmp,
+		.repeat = 1,
+	);
 
 	if (unpriv)
 		set_admin(true);
-	err = bpf_prog_test_run(fd_prog, 1, data, size_data,
-				tmp, &size_tmp, &retval, NULL);
+	err = bpf_prog_test_run_opts(fd_prog, &topts);
 	saved_errno = errno;
 
 	if (unpriv)
@@ -1051,9 +1056,8 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 		}
 	}
 
-	if (retval != expected_val &&
-	    expected_val != POINTER_VALUE) {
-		printf("FAIL retval %d != %d ", retval, expected_val);
+	if (topts.retval != expected_val && expected_val != POINTER_VALUE) {
+		printf("FAIL retval %d != %d ", topts.retval, expected_val);
 		return 1;
 	}
 
@@ -1176,7 +1180,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	 * bpf_probe_prog_type won't give correct answer
 	 */
 	if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
-	    !bpf_probe_prog_type(prog_type, 0)) {
+	    !libbpf_probe_bpf_prog_type(prog_type, NULL)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
 		goto close_fds;
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
index d10cefd6eb09..ea69370caae3 100755
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -2,6 +2,8 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
 
 cleanup()
 {
@@ -13,8 +15,8 @@ cleanup()
 
 	set +e
 	ip link del veth1 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 }
 
 ip link set dev lo xdp off 2>/dev/null > /dev/null
@@ -24,32 +26,32 @@ if [ $? -ne 0 ];then
 fi
 set -e
 
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
 
 trap cleanup 0 2 3 6 9
 
 ip link add veth1 type veth peer name veth2
 
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
 
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
 
-ip netns exec ns1 tc qdisc add dev veth1 clsact
-ip netns exec ns2 tc qdisc add dev veth2 clsact
+ip netns exec ${NS1} tc qdisc add dev veth1 clsact
+ip netns exec ${NS2} tc qdisc add dev veth2 clsact
 
-ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
-ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
 
-ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
-ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x
 
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
 
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+ip netns exec ${NS1} ping -c 1 10.1.1.22
+ip netns exec ${NS2} ping -c 1 10.1.1.11
 
 exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index 57c8db9972a6..1d79f31480ad 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -10,6 +10,8 @@
 #     | xdp forwarding |
 #     ------------------
 
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
 ret=0
 
 setup()
@@ -17,27 +19,27 @@ setup()
 
 	local xdpmode=$1
 
-	ip netns add ns1
-	ip netns add ns2
+	ip netns add ${NS1}
+	ip netns add ${NS2}
 
-	ip link add veth1 index 111 type veth peer name veth11 netns ns1
-	ip link add veth2 index 222 type veth peer name veth22 netns ns2
+	ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+	ip link add veth2 index 222 type veth peer name veth22 netns ${NS2}
 
 	ip link set veth1 up
 	ip link set veth2 up
-	ip -n ns1 link set dev veth11 up
-	ip -n ns2 link set dev veth22 up
+	ip -n ${NS1} link set dev veth11 up
+	ip -n ${NS2} link set dev veth22 up
 
-	ip -n ns1 addr add 10.1.1.11/24 dev veth11
-	ip -n ns2 addr add 10.1.1.22/24 dev veth22
+	ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+	ip -n ${NS2} addr add 10.1.1.22/24 dev veth22
 }
 
 cleanup()
 {
 	ip link del veth1 2> /dev/null
 	ip link del veth2 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 }
 
 test_xdp_redirect()
@@ -52,13 +54,13 @@ test_xdp_redirect()
 		return 0
 	fi
 
-	ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
-	ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+	ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+	ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
 	ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
 	ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
 
-	if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null &&
-	   ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then
+	if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null &&
+	   ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then
 		echo "selftests: test_xdp_redirect $xdpmode [PASS]";
 	else
 		ret=1
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
index 05f872740999..cc57cb87e65f 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress"
 PASS=0
 FAIL=0
 LOG_DIR=$(mktemp -d)
+declare -a NS
+NS[0]="ns0-$(mktemp -u XXXXXX)"
+NS[1]="ns1-$(mktemp -u XXXXXX)"
+NS[2]="ns2-$(mktemp -u XXXXXX)"
+NS[3]="ns3-$(mktemp -u XXXXXX)"
 
 test_pass()
 {
@@ -47,11 +52,9 @@ test_fail()
 
 clean_up()
 {
-	for i in $(seq $NUM); do
-		ip link del veth$i 2> /dev/null
-		ip netns del ns$i 2> /dev/null
+	for i in $(seq 0 $NUM); do
+		ip netns del ${NS[$i]} 2> /dev/null
 	done
-	ip netns del ns0 2> /dev/null
 }
 
 # Kselftest framework requirement - SKIP code is 4.
@@ -79,23 +82,22 @@ setup_ns()
 		mode="xdpdrv"
 	fi
 
-	ip netns add ns0
+	ip netns add ${NS[0]}
 	for i in $(seq $NUM); do
-	        ip netns add ns$i
-		ip -n ns$i link add veth0 index 2 type veth \
-			peer name veth$i netns ns0 index $((1 + $i))
-		ip -n ns0 link set veth$i up
-		ip -n ns$i link set veth0 up
-
-		ip -n ns$i addr add 192.0.2.$i/24 dev veth0
-		ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+	        ip netns add ${NS[$i]}
+		ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
+		ip -n ${NS[$i]} link set veth0 up
+		ip -n ${NS[0]} link set veth$i up
+
+		ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
+		ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
 		# Add a neigh entry for IPv4 ping test
-		ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
-		ip -n ns$i link set veth0 $mode obj \
+		ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+		ip -n ${NS[$i]} link set veth0 $mode obj \
 			xdp_dummy.o sec xdp &> /dev/null || \
 			{ test_fail "Unable to load dummy xdp" && exit 1; }
 		IFACES="$IFACES veth$i"
-		veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}')
+		veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
 	done
 }
 
@@ -104,10 +106,10 @@ do_egress_tests()
 	local mode=$1
 
 	# mac test
-	ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
-	ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
+	ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
+	ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
 	sleep 0.5
-	ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+	ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
 	sleep 0.5
 	pkill tcpdump
 
@@ -123,18 +125,18 @@ do_ping_tests()
 	local mode=$1
 
 	# ping6 test: echo request should be redirect back to itself, not others
-	ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+	ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
 
-	ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
-	ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
-	ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
+	ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
+	ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
+	ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
 	sleep 0.5
 	# ARP test
-	ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254
+	ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
 	# IPv4 test
-	ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+	ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
 	# IPv6 test
-	ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+	ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
 	sleep 0.5
 	pkill tcpdump
 
@@ -180,7 +182,7 @@ do_tests()
 		xdpgeneric) drv_p="-S";;
 	esac
 
-	ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
+	ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
 	xdp_pid=$!
 	sleep 1
 	if ! ps -p $xdp_pid > /dev/null; then
@@ -197,10 +199,10 @@ do_tests()
 	kill $xdp_pid
 }
 
-trap clean_up EXIT
-
 check_env
 
+trap clean_up EXIT
+
 for mode in ${DRV_MODE}; do
 	setup_ns $mode
 	do_tests $mode
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index a3a1eaee26ea..392d28cc4e58 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -22,6 +22,9 @@ ksft_skip=4
 TESTNAME=xdp_veth
 BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
 BPF_DIR=$BPF_FS/test_$TESTNAME
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
 
 _cleanup()
 {
@@ -29,9 +32,9 @@ _cleanup()
 	ip link del veth1 2> /dev/null
 	ip link del veth2 2> /dev/null
 	ip link del veth3 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
-	ip netns del ns3 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
+	ip netns del ${NS3} 2> /dev/null
 	rm -rf $BPF_DIR 2> /dev/null
 }
 
@@ -77,24 +80,24 @@ set -e
 
 trap cleanup_skip EXIT
 
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
 
-ip link add veth1 index 111 type veth peer name veth11 netns ns1
-ip link add veth2 index 122 type veth peer name veth22 netns ns2
-ip link add veth3 index 133 type veth peer name veth33 netns ns3
+ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+ip link add veth2 index 122 type veth peer name veth22 netns ${NS2}
+ip link add veth3 index 133 type veth peer name veth33 netns ${NS3}
 
 ip link set veth1 up
 ip link set veth2 up
 ip link set veth3 up
 
-ip -n ns1 addr add 10.1.1.11/24 dev veth11
-ip -n ns3 addr add 10.1.1.33/24 dev veth33
+ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+ip -n ${NS3} addr add 10.1.1.33/24 dev veth33
 
-ip -n ns1 link set dev veth11 up
-ip -n ns2 link set dev veth22 up
-ip -n ns3 link set dev veth33 up
+ip -n ${NS1} link set dev veth11 up
+ip -n ${NS2} link set dev veth22 up
+ip -n ${NS3} link set dev veth33 up
 
 mkdir $BPF_DIR
 bpftool prog loadall \
@@ -107,12 +110,12 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
 ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
 
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp
+ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp
+ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp
+ip -n ${NS3} link set dev veth33 xdp obj xdp_dummy.o sec xdp
 
 trap cleanup EXIT
 
-ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
+ip netns exec ${NS1} ping -c 1 -W 1 10.1.1.33
 
 exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
index 0cbc7604a2f8..810c407e0286 100755
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -4,6 +4,8 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
 
 # Allow wrapper scripts to name test
 if [ -z "$TESTNAME" ]; then
@@ -49,15 +51,15 @@ cleanup()
 
 	if [ -n "$INTERACTIVE" ]; then
 		echo "Namespace setup still active explore with:"
-		echo " ip netns exec ns1 bash"
-		echo " ip netns exec ns2 bash"
+		echo " ip netns exec ${NS1} bash"
+		echo " ip netns exec ${NS2} bash"
 		exit $status
 	fi
 
 	set +e
 	ip link del veth1 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 }
 
 # Using external program "getopt" to get --long-options
@@ -126,8 +128,8 @@ fi
 # Interactive mode likely require us to cleanup netns
 if [ -n "$INTERACTIVE" ]; then
 	ip link del veth1 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 fi
 
 # Exit on failure
@@ -144,8 +146,8 @@ if [ -n "$VERBOSE" ]; then
 fi
 
 # Create two namespaces
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
 
 # Run cleanup if failing or on kill
 trap cleanup 0 2 3 6 9
@@ -154,44 +156,44 @@ trap cleanup 0 2 3 6 9
 ip link add veth1 type veth peer name veth2
 
 # Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
 
 # NOTICE: XDP require VLAN header inside packet payload
 #  - Thus, disable VLAN offloading driver features
 #  - For veth REMEMBER TX side VLAN-offload
 #
 # Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ns1 ethtool -K veth1 rxvlan off
-ip netns exec ns2 ethtool -K veth2 rxvlan off
+ip netns exec ${NS1} ethtool -K veth1 rxvlan off
+ip netns exec ${NS2} ethtool -K veth2 rxvlan off
 #
 # Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ns2 ethtool -K veth2 txvlan off
-ip netns exec ns1 ethtool -K veth1 txvlan off
+ip netns exec ${NS2} ethtool -K veth2 txvlan off
+ip netns exec ${NS1} ethtool -K veth1 txvlan off
 
 export IPADDR1=100.64.41.1
 export IPADDR2=100.64.41.2
 
 # In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ns1 ip link set veth1 up
+ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ${NS1} ip link set veth1 up
 
 # In ns2/veth2 create VLAN device
 export VLAN=4011
 export DEVNS2=veth2
-ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ns2 ip link set $DEVNS2 up
-ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ${NS2} ip link set $DEVNS2 up
+ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
 
 # Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ns1 ip link set lo up
-ip netns exec ns2 ip link set lo up
+ip netns exec ${NS1} ip link set lo up
+ip netns exec ${NS2} ip link set lo up
 
 # At this point, the hosts cannot reach each-other,
 # because ns2 are using VLAN tags on the packets.
 
-ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
+ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
 
 
 # Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
@@ -202,19 +204,19 @@ export FILE=test_xdp_vlan.o
 
 # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
 export XDP_PROG=xdp_vlan_change
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
 
 # In ns1: egress use TC to add back VLAN tag 4011
 #  (del cmd)
 #  tc qdisc del dev $DEVNS1 clsact 2> /dev/null
 #
-ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
-ip netns exec ns1 tc filter add dev $DEVNS1 egress \
+ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
+ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
   prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
 
 # Now the namespaces can reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
 
 # Second test: Replace xdp prog, that fully remove vlan header
 #
@@ -223,9 +225,9 @@ ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
 # ETH_P_8021Q indication, and this cause overwriting of our changes.
 #
 export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
 
 # Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 7b7f918eda77..ca6abae9b09c 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -138,6 +138,29 @@ void read_trace_pipe(void)
 	}
 }
 
+ssize_t get_uprobe_offset(const void *addr)
+{
+	size_t start, end, base;
+	char buf[256];
+	bool found = false;
+	FILE *f;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f)
+		return -errno;
+
+	while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+		if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+			found = true;
+			break;
+		}
+	}
+
+	fclose(f);
+
+	if (!found)
+		return -ESRCH;
+
 #if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
 
 #define OP_RT_RA_MASK   0xffff0000UL
@@ -145,10 +168,6 @@ void read_trace_pipe(void)
 #define ADDIS_R2_R12    0x3c4c0000UL
 #define ADDI_R2_R2      0x38420000UL
 
-ssize_t get_uprobe_offset(const void *addr, ssize_t base)
-{
-	u32 *insn = (u32 *)(uintptr_t)addr;
-
 	/*
 	 * A PPC64 ABIv2 function may have a local and a global entry
 	 * point. We need to use the local entry point when patching
@@ -165,43 +184,16 @@ ssize_t get_uprobe_offset(const void *addr, ssize_t base)
 	 * lis   r2,XXXX
 	 * addi  r2,r2,XXXX
 	 */
-	if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
-	     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
-	    ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
-		return (ssize_t)(insn + 2) - base;
-	else
-		return (uintptr_t)addr - base;
-}
-
-#else
+	{
+		const u32 *insn = (const u32 *)(uintptr_t)addr;
 
-ssize_t get_uprobe_offset(const void *addr, ssize_t base)
-{
-	return (uintptr_t)addr - base;
-}
-
-#endif
-
-ssize_t get_base_addr(void)
-{
-	size_t start, offset;
-	char buf[256];
-	FILE *f;
-
-	f = fopen("/proc/self/maps", "r");
-	if (!f)
-		return -errno;
-
-	while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
-		      &start, buf, &offset) == 3) {
-		if (strcmp(buf, "r-xp") == 0) {
-			fclose(f);
-			return start - offset;
-		}
+		if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+		     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+		    ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+			return (uintptr_t)(insn + 2) - start + base;
 	}
-
-	fclose(f);
-	return -EINVAL;
+#endif
+	return (uintptr_t)addr - start + base;
 }
 
 ssize_t get_rel_offset(uintptr_t addr)
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index d907b445524d..238a9c98cde2 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -18,8 +18,7 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
 
 void read_trace_pipe(void);
 
-ssize_t get_uprobe_offset(const void *addr, ssize_t base);
-ssize_t get_base_addr(void);
+ssize_t get_uprobe_offset(const void *addr);
 ssize_t get_rel_offset(uintptr_t addr);
 
 #endif
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index ce13ece08d51..8c224eac93df 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -121,7 +121,25 @@
 	.result = ACCEPT,
 },
 {
-	"sk_fullsock(skb->sk): sk->dst_port [narrow load]",
+	"sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = ACCEPT,
+},
+{
+	"sk_fullsock(skb->sk): sk->dst_port [half load]",
 	.insns = {
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -139,7 +157,64 @@
 	.result = ACCEPT,
 },
 {
-	"sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
+	"sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = REJECT,
+	.errstr = "invalid sock access",
+},
+{
+	"sk_fullsock(skb->sk): sk->dst_port [byte load]",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
+	BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = ACCEPT,
+},
+{
+	"sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = REJECT,
+	.errstr = "invalid sock access",
+},
+{
+	"sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)",
 	.insns = {
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -149,7 +224,7 @@
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
-	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
+	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index 51c8224b4ccc..aaedbf4955c3 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
 	int i;
 
 	for (i = 0; ifaces[i] > 0; i++) {
-		if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
-			printf("bpf_get_link_xdp_id failed\n");
+		if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
+			printf("bpf_xdp_query_id failed\n");
 			exit(1);
 		}
 		if (prog_id)
-			bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+			bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
 	}
 
 	exit(0);
@@ -210,7 +210,7 @@ int main(int argc, char **argv)
 		}
 
 		/* bind prog_fd to each interface */
-		ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+		ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
 		if (ret) {
 			printf("Set xdp fd failed on %d\n", ifindex);
 			goto err_out;
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index baa870a759a2..c567856fd1bc 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -29,7 +29,7 @@ static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 
 static void cleanup(int sig)
 {
-	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	bpf_xdp_detach(ifindex, xdp_flags, NULL);
 	if (sig)
 		exit(1);
 }
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
 
 	printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
 
-	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+	if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
 		fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
 		goto done;
 	}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index ffa5502ad95e..5f8296d29e77 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -266,22 +266,24 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 }
 
 static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
-				struct ifobject *ifobject, u32 qid)
+				struct ifobject *ifobject, bool shared)
 {
-	struct xsk_socket_config cfg;
+	struct xsk_socket_config cfg = {};
 	struct xsk_ring_cons *rxr;
 	struct xsk_ring_prod *txr;
 
 	xsk->umem = umem;
 	cfg.rx_size = xsk->rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	cfg.libbpf_flags = 0;
+	cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
 	cfg.xdp_flags = ifobject->xdp_flags;
 	cfg.bind_flags = ifobject->bind_flags;
+	if (shared)
+		cfg.bind_flags |= XDP_SHARED_UMEM;
 
 	txr = ifobject->tx_on ? &xsk->tx : NULL;
 	rxr = ifobject->rx_on ? &xsk->rx : NULL;
-	return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg);
+	return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg);
 }
 
 static struct option long_options[] = {
@@ -387,7 +389,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	for (i = 0; i < MAX_INTERFACES; i++) {
 		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
 
-		ifobj->umem = &ifobj->umem_arr[0];
 		ifobj->xsk = &ifobj->xsk_arr[0];
 		ifobj->use_poll = false;
 		ifobj->pacing_on = true;
@@ -401,11 +402,12 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 			ifobj->tx_on = false;
 		}
 
+		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+		ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+		ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
 		for (j = 0; j < MAX_SOCKETS; j++) {
-			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
-			ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS;
-			ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 		}
 	}
@@ -950,7 +952,10 @@ static void tx_stats_validate(struct ifobject *ifobject)
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
+	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+	int ret, ifindex;
+	void *bufs;
 	u32 i;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
@@ -958,23 +963,20 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (ifobject->umem->unaligned_mode)
 		mmap_flags |= MAP_HUGETLB;
 
-	for (i = 0; i < test->nb_sockets; i++) {
-		u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
-		u32 ctr = 0;
-		void *bufs;
-		int ret;
+	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+	if (bufs == MAP_FAILED)
+		exit_with_error(errno);
 
-		bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-		if (bufs == MAP_FAILED)
-			exit_with_error(errno);
+	ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz);
+	if (ret)
+		exit_with_error(-ret);
 
-		ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz);
-		if (ret)
-			exit_with_error(-ret);
+	for (i = 0; i < test->nb_sockets; i++) {
+		u32 ctr = 0;
 
 		while (ctr++ < SOCK_RECONF_CTR) {
-			ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i],
-						   ifobject, i);
+			ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem,
+						   ifobject, !!i);
 			if (!ret)
 				break;
 
@@ -985,8 +987,22 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 		}
 	}
 
-	ifobject->umem = &ifobject->umem_arr[0];
 	ifobject->xsk = &ifobject->xsk_arr[0];
+
+	if (!ifobject->rx_on)
+		return;
+
+	ifindex = if_nametoindex(ifobject->ifname);
+	if (!ifindex)
+		exit_with_error(errno);
+
+	ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd);
+	if (ret)
+		exit_with_error(-ret);
+
+	ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
+	if (ret)
+		exit_with_error(-ret);
 }
 
 static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
@@ -1142,14 +1158,16 @@ static void testapp_bidi(struct test_spec *test)
 
 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
 {
+	int ret;
+
 	xsk_socket__delete(ifobj_tx->xsk->xsk);
-	xsk_umem__delete(ifobj_tx->umem->umem);
 	xsk_socket__delete(ifobj_rx->xsk->xsk);
-	xsk_umem__delete(ifobj_rx->umem->umem);
-	ifobj_tx->umem = &ifobj_tx->umem_arr[1];
 	ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
-	ifobj_rx->umem = &ifobj_rx->umem_arr[1];
 	ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
+
+	ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd);
+	if (ret)
+		exit_with_error(-ret);
 }
 
 static void testapp_bpf_res(struct test_spec *test)
@@ -1408,13 +1426,13 @@ static struct ifobject *ifobject_create(void)
 	if (!ifobj->xsk_arr)
 		goto out_xsk_arr;
 
-	ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr));
-	if (!ifobj->umem_arr)
-		goto out_umem_arr;
+	ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+	if (!ifobj->umem)
+		goto out_umem;
 
 	return ifobj;
 
-out_umem_arr:
+out_umem:
 	free(ifobj->xsk_arr);
 out_xsk_arr:
 	free(ifobj);
@@ -1423,7 +1441,7 @@ out_xsk_arr:
 
 static void ifobject_delete(struct ifobject *ifobj)
 {
-	free(ifobj->umem_arr);
+	free(ifobj->umem);
 	free(ifobj->xsk_arr);
 	free(ifobj);
 }
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 2f705f44b748..62a3e6388632 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -125,10 +125,10 @@ struct ifobject {
 	struct xsk_socket_info *xsk;
 	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
-	struct xsk_umem_info *umem_arr;
 	thread_func_t func_ptr;
 	struct pkt_stream *pkt_stream;
 	int ns_fd;
+	int xsk_map_fd;
 	u32 dst_ip;
 	u32 src_ip;
 	u32 xdp_flags;
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 3b0489910422..4f70baad867d 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -114,10 +114,25 @@ fib_rule6_test_match_n_redirect()
 	log_test $? 0 "rule6 del by pref: $description"
 }
 
+fib_rule6_test_reject()
+{
+	local match="$1"
+	local rc
+
+	$IP -6 rule add $match table $RTABLE 2>/dev/null
+	rc=$?
+	log_test $rc 2 "rule6 check: $match"
+
+	if [ $rc -eq 0 ]; then
+		$IP -6 rule del $match table $RTABLE
+	fi
+}
+
 fib_rule6_test()
 {
 	local getmatch
 	local match
+	local cnt
 
 	# setup the fib rule redirect route
 	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
@@ -128,8 +143,21 @@ fib_rule6_test()
 	match="from $SRC_IP6 iif $DEV"
 	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
 
+	# Reject dsfield (tos) options which have ECN bits set
+	for cnt in $(seq 1 3); do
+		match="dsfield $cnt"
+		fib_rule6_test_reject "$match"
+	done
+
+	# Don't take ECN bits into account when matching on dsfield
 	match="tos 0x10"
-	fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		# Using option 'tos' instead of 'dsfield' as old iproute2
+		# versions don't support 'dsfield' in ip rule show.
+		getmatch="tos $cnt"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+						"$getmatch redirect to table"
+	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
@@ -187,10 +215,25 @@ fib_rule4_test_match_n_redirect()
 	log_test $? 0 "rule4 del by pref: $description"
 }
 
+fib_rule4_test_reject()
+{
+	local match="$1"
+	local rc
+
+	$IP rule add $match table $RTABLE 2>/dev/null
+	rc=$?
+	log_test $rc 2 "rule4 check: $match"
+
+	if [ $rc -eq 0 ]; then
+		$IP rule del $match table $RTABLE
+	fi
+}
+
 fib_rule4_test()
 {
 	local getmatch
 	local match
+	local cnt
 
 	# setup the fib rule redirect route
 	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
@@ -206,8 +249,21 @@ fib_rule4_test()
 	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
 	ip netns exec testns sysctl -qw net.ipv4.ip_forward=0
 
+	# Reject dsfield (tos) options which have ECN bits set
+	for cnt in $(seq 1 3); do
+		match="dsfield $cnt"
+		fib_rule4_test_reject "$match"
+	done
+
+	# Don't take ECN bits into account when matching on dsfield
 	match="tos 0x10"
-	fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		# Using option 'tos' instead of 'dsfield' as old iproute2
+		# versions don't support 'dsfield' in ip rule show.
+		getmatch="tos $cnt"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+						"$getmatch redirect to table"
+	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 996af1ae3d3d..bb73235976b3 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1447,6 +1447,81 @@ ipv4_local_rt_cache()
 	log_test $? 0 "Cached route removed from VRF port device"
 }
 
+ipv4_rt_dsfield()
+{
+	echo
+	echo "IPv4 route with dsfield tests"
+
+	run_cmd "$IP route flush 172.16.102.0/24"
+
+	# New routes should reject dsfield options that interfere with ECN
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x01 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x01"
+
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x02 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x02"
+
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x03 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x03"
+
+	# A generic route that doesn't take DSCP into account
+	run_cmd "$IP route add 172.16.102.0/24 via 172.16.101.2"
+
+	# A more specific route for DSCP 0x10
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x10 via 172.16.103.2"
+
+	# DSCP 0x10 should match the specific route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:CE"
+
+	# Unknown DSCP should match the generic route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
+
+	# Null DSCP should match the generic route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
+}
+
 ipv4_route_test()
 {
 	route_setup
@@ -1454,6 +1529,7 @@ ipv4_route_test()
 	ipv4_rt_add
 	ipv4_rt_replace
 	ipv4_local_rt_cache
+	ipv4_rt_dsfield
 
 	route_cleanup
 }
diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh
new file mode 100755
index 000000000000..d14efb2d23b2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	test_ip4_src
+	test_ip4_dst
+	test_ip6_src
+	test_ip6_dst
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_ip()
+{
+	local pedit_locus=$1; shift
+	local pedit_action=$1; shift
+	local match_prot=$1; shift
+	local match_flower=$1; shift
+	local mz_flags=$1; shift
+
+	tc filter add $pedit_locus handle 101 pref 1 \
+	   flower action pedit ex munge $pedit_action
+	tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+	   flower skip_hw $match_flower action pass
+
+	RET=0
+
+	$MZ $mz_flags $h1 -c 10 -d 20msec -p 100 -a own -b $h2mac -q -t ip
+
+	local pkts
+	pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+			tc_rule_handle_stats_get "dev $h2 ingress" 101)
+	check_err $? "Expected to get 10 packets, but got $pkts."
+
+	pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+	((pkts >= 10))
+	check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+	log_test "$pedit_locus pedit $pedit_action"
+
+	tc filter del dev $h2 ingress pref 1
+	tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_ip6()
+{
+	local locus=$1; shift
+	local pedit_addr=$1; shift
+	local flower_addr=$1; shift
+
+	do_test_pedit_ip "$locus" "$pedit_addr set 2001:db8:2::1" ipv6	\
+			 "$flower_addr 2001:db8:2::1"			\
+			 "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+}
+
+do_test_pedit_ip4()
+{
+	local locus=$1; shift
+	local pedit_addr=$1; shift
+	local flower_addr=$1; shift
+
+	do_test_pedit_ip "$locus" "$pedit_addr set 198.51.100.1" ip	\
+			 "$flower_addr 198.51.100.1"			\
+			 "-A 192.0.2.1 -B 192.0.2.2"
+}
+
+test_ip4_src()
+{
+	do_test_pedit_ip4 "dev $swp1 ingress" "ip src" src_ip
+	do_test_pedit_ip4 "dev $swp2 egress"  "ip src" src_ip
+}
+
+test_ip4_dst()
+{
+	do_test_pedit_ip4 "dev $swp1 ingress" "ip dst" dst_ip
+	do_test_pedit_ip4 "dev $swp2 egress"  "ip dst" dst_ip
+}
+
+test_ip6_src()
+{
+	do_test_pedit_ip6 "dev $swp1 ingress" "ip6 src" src_ip
+	do_test_pedit_ip6 "dev $swp2 egress"  "ip6 src" src_ip
+}
+
+test_ip6_dst()
+{
+	do_test_pedit_ip6 "dev $swp1 ingress" "ip6 dst" dst_ip
+	do_test_pedit_ip6 "dev $swp2 egress"  "ip6 dst" dst_ip
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index bd106c7ec232..4a565fb84137 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -15,6 +15,7 @@ timeout_test=$((timeout_poll * 2 + 1))
 mptcp_connect=""
 capture=0
 checksum=0
+ip_mptcp=0
 do_all_tests=1
 
 TEST_COUNT=0
@@ -239,6 +240,16 @@ is_v6()
 	[ -z "${1##*:*}" ]
 }
 
+is_addr()
+{
+	[ -z "${1##*[.:]*}" ]
+}
+
+is_number()
+{
+	[[ $1 == ?(-)+([0-9]) ]]
+}
+
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -278,6 +289,109 @@ wait_rm_addr()
 	done
 }
 
+pm_nl_set_limits()
+{
+	local ns=$1
+	local addrs=$2
+	local subflows=$3
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
+	else
+		ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
+	fi
+}
+
+pm_nl_add_endpoint()
+{
+	local ns=$1
+	local addr=$2
+	local flags
+	local port
+	local dev
+	local id
+	local nr=2
+
+	for p in $@
+	do
+		if [ $p = "flags" ]; then
+			eval _flags=\$"$nr"
+			[ ! -z $_flags ]; flags="flags $_flags"
+		fi
+		if [ $p = "dev" ]; then
+			eval _dev=\$"$nr"
+			[ ! -z $_dev ]; dev="dev $_dev"
+		fi
+		if [ $p = "id" ]; then
+			eval _id=\$"$nr"
+			[ ! -z $_id ]; id="id $_id"
+		fi
+		if [ $p = "port" ]; then
+			eval _port=\$"$nr"
+			[ ! -z $_port ]; port="port $_port"
+		fi
+
+		let nr+=1
+	done
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
+	else
+		ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
+	fi
+}
+
+pm_nl_del_endpoint()
+{
+	local ns=$1
+	local id=$2
+	local addr=$3
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint delete id $id $addr
+	else
+		ip netns exec $ns ./pm_nl_ctl del $id $addr
+	fi
+}
+
+pm_nl_flush_endpoint()
+{
+	local ns=$1
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint flush
+	else
+		ip netns exec $ns ./pm_nl_ctl flush
+	fi
+}
+
+pm_nl_show_endpoints()
+{
+	local ns=$1
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint show
+	else
+		ip netns exec $ns ./pm_nl_ctl dump
+	fi
+}
+
+pm_nl_change_endpoint()
+{
+	local ns=$1
+	local flags=$2
+	local id=$3
+	local addr=$4
+	local port=""
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
+	else
+		if [ $5 -ne 0 ]; then port="port $5"; fi
+		ip netns exec $ns ./pm_nl_ctl set $addr flags $flags $port
+	fi
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -378,31 +492,36 @@ do_transfer()
 			else
 				addr="10.0.$counter.1"
 			fi
-			ip netns exec $ns1 ./pm_nl_ctl add $addr flags signal
+			pm_nl_add_endpoint $ns1 $addr flags signal
 			let counter+=1
 			let add_nr_ns1-=1
 		done
 	elif [ $addr_nr_ns1 -lt 0 ]; then
 		let rm_nr_ns1=-addr_nr_ns1
 		if [ $rm_nr_ns1 -lt 8 ]; then
-			counter=1
-			pos=1
-			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
-			if [ ${#dump[@]} -gt 0 ]; then
-				while [ $counter -le $rm_nr_ns1 ]
-				do
-					id=${dump[$pos]}
-					rm_addr=$(rm_addr_count ${connector_ns})
-					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
-					wait_rm_addr ${connector_ns} ${rm_addr}
-					let counter+=1
-					let pos+=5
+			counter=0
+			pm_nl_show_endpoints ${listener_ns} | while read line; do
+				local arr=($line)
+				local nr=0
+
+				for i in ${arr[@]}; do
+					if [ $i = "id" ]; then
+						if [ $counter -eq $rm_nr_ns1 ]; then
+							break
+						fi
+						id=${arr[$nr+1]}
+						rm_addr=$(rm_addr_count ${connector_ns})
+						pm_nl_del_endpoint ${listener_ns} $id
+						wait_rm_addr ${connector_ns} ${rm_addr}
+						let counter+=1
+					fi
+					let nr+=1
 				done
-			fi
+			done
 		elif [ $rm_nr_ns1 -eq 8 ]; then
-			ip netns exec ${listener_ns} ./pm_nl_ctl flush
+			pm_nl_flush_endpoint ${listener_ns}
 		elif [ $rm_nr_ns1 -eq 9 ]; then
-			ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
+			pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr}
 		fi
 	fi
 
@@ -426,30 +545,36 @@ do_transfer()
 			else
 				addr="10.0.$counter.2"
 			fi
-			ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
+			pm_nl_add_endpoint $ns2 $addr flags $flags
 			let counter+=1
 			let add_nr_ns2-=1
 		done
 	elif [ $addr_nr_ns2 -lt 0 ]; then
 		let rm_nr_ns2=-addr_nr_ns2
 		if [ $rm_nr_ns2 -lt 8 ]; then
-			counter=1
-			pos=1
-			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
-			if [ ${#dump[@]} -gt 0 ]; then
-				while [ $counter -le $rm_nr_ns2 ]
-				do
-					# rm_addr are serialized, allow the previous one to complete
-					id=${dump[$pos]}
-					rm_addr=$(rm_addr_count ${listener_ns})
-					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
-					wait_rm_addr ${listener_ns} ${rm_addr}
-					let counter+=1
-					let pos+=5
+			counter=0
+			pm_nl_show_endpoints ${connector_ns} | while read line; do
+				local arr=($line)
+				local nr=0
+
+				for i in ${arr[@]}; do
+					if [ $i = "id" ]; then
+						if [ $counter -eq $rm_nr_ns2 ]; then
+							break
+						fi
+						# rm_addr are serialized, allow the previous one to
+						# complete
+						id=${arr[$nr+1]}
+						rm_addr=$(rm_addr_count ${listener_ns})
+						pm_nl_del_endpoint ${connector_ns} $id
+						wait_rm_addr ${listener_ns} ${rm_addr}
+						let counter+=1
+					fi
+					let nr+=1
 				done
-			fi
+			done
 		elif [ $rm_nr_ns2 -eq 8 ]; then
-			ip netns exec ${connector_ns} ./pm_nl_ctl flush
+			pm_nl_flush_endpoint ${connector_ns}
 		elif [ $rm_nr_ns2 -eq 9 ]; then
 			local addr
 			if is_v6 "${connect_addr}"; then
@@ -457,18 +582,34 @@ do_transfer()
 			else
 				addr="10.0.1.2"
 			fi
-			ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
+			pm_nl_del_endpoint ${connector_ns} 0 $addr
 		fi
 	fi
 
 	if [ ! -z $sflags ]; then
 		sleep 1
 		for netns in "$ns1" "$ns2"; do
-			dump=(`ip netns exec $netns ./pm_nl_ctl dump`)
-			if [ ${#dump[@]} -gt 0 ]; then
-				addr=${dump[${#dump[@]} - 1]}
-				ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags
-			fi
+			pm_nl_show_endpoints $netns | while read line; do
+				local arr=($line)
+				local addr
+				local port=0
+				local id
+
+				for i in ${arr[@]}; do
+					if is_addr $i; then
+						addr=$i
+					elif is_number $i; then
+						# The minimum expected port number is 10000
+						if [ $i -gt 10000 ]; then
+							port=$i
+						# The maximum id number is 255
+						elif [ $i -lt 255 ]; then
+							id=$i
+						fi
+					fi
+				done
+				pm_nl_change_endpoint $netns $sflags $id $addr $port
+			done
 		done
 	fi
 
@@ -977,51 +1118,51 @@ subflows_tests()
 
 	# subflow limited by client
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 0
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 0
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 0
+	pm_nl_set_limits $ns2 0 0
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow, limited by client" 0 0 0
 
 	# subflow limited by server
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 0
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 0
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow, limited by server" 1 1 0
 
 	# subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow" 1 1 1
 
 	# multiple subflows
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows" 2 2 2
 
 	# multiple subflows limited by server
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows, limited by server" 2 2 1
 
 	# single subflow, dev
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow, dev" 1 1 1
 }
@@ -1031,28 +1172,28 @@ subflows_error_tests()
 	# If a single subflow is configured, and matches the MPC src
 	# address, no additional subflow should be created
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "no MPC reuse with single endpoint" 0 0 0
 
 	# multiple subflows, with subflow creation error
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "multi subflows, with failing subflow" 1 1 1
 
 	# multiple subflows, with subflow timeout on MPJ
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "multi subflows, with subflow timeout" 1 1 1
@@ -1061,9 +1202,9 @@ subflows_error_tests()
 	# closed subflow (due to reset) is not reused if additional
 	# subflows are added later
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
 
@@ -1073,7 +1214,7 @@ subflows_error_tests()
 
 	# mpj subflow will be in TW after the reset
 	wait_for_tw $ns2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	wait
 
 	# additional subflow could be created only if the PM select
@@ -1085,16 +1226,16 @@ signal_address_tests()
 {
 	# add_address, unused
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "unused signal address" 0 0 0
 	chk_add_nr 1 1
 
 	# accept and use add_addr
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address" 1 1 1
 	chk_add_nr 1 1
@@ -1104,59 +1245,59 @@ signal_address_tests()
 	# belong to different subnets or one of the listed local address could be
 	# used for 'add_addr' subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and signal" 2 2 2
 	chk_add_nr 1 1
 
 	# accept and use add_addr with additional subflows
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows and signal" 3 3 3
 	chk_add_nr 1 1
 
 	# signal addresses
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal addresses" 3 3 3
 	chk_add_nr 3 3
 
 	# signal invalid addresses
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal invalid addresses" 1 1 1
 	chk_add_nr 3 3
 
 	# signal addresses race test
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_set_limits $ns2 4 4
+	pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_add_endpoint $ns2 10.0.1.2 flags signal
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 
 	# the server will not signal the address terminating
@@ -1176,11 +1317,11 @@ link_failure_tests()
 	# active backup and link switch-over.
 	# Let's set some arbitrary (low) virtual link limits.
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "multiple flows, signal, link failure" 3 3 3
 	chk_add_nr 1 1
@@ -1190,11 +1331,11 @@ link_failure_tests()
 	# for bidirectional transfer
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 2
 	chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
 	chk_add_nr 1 1
@@ -1204,11 +1345,11 @@ link_failure_tests()
 	# will never be used
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 2
 	export FAILING_LINKS="1"
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "backup subflow unused, link failure" 2 2 2
 	chk_add_nr 1 1
@@ -1218,10 +1359,10 @@ link_failure_tests()
 	# the traffic
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 	export FAILING_LINKS="1 2"
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "backup flow used, multi links fail" 2 2 2
@@ -1233,10 +1374,10 @@ link_failure_tests()
 	# for bidirectional transfer
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 	run_tests $ns1 $ns2 10.0.1.1 2
 	chk_join_nr "backup flow used, bidi, link failure" 2 2 2
 	chk_add_nr 1 1
@@ -1248,38 +1389,38 @@ add_addr_timeout_tests()
 {
 	# add_addr timeout
 	reset_with_add_addr_timeout
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
 	chk_add_nr 4 0
 
 	# add_addr timeout IPv6
 	reset_with_add_addr_timeout 6
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
 	chk_add_nr 4 0
 
 	# signal addresses timeout
 	reset_with_add_addr_timeout
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
 	chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
 	chk_add_nr 8 0
 
 	# signal invalid addresses timeout
 	reset_with_add_addr_timeout
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
 	chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
 	chk_add_nr 8 0
@@ -1289,28 +1430,28 @@ remove_tests()
 {
 	# single subflow, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
 	chk_join_nr "remove single subflow" 1 1 1
 	chk_rm_nr 1 1
 
 	# multiple subflows, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow
 	chk_join_nr "remove multiple subflows" 2 2 2
 	chk_rm_nr 2 2
 
 	# single address, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address" 1 1 1
 	chk_add_nr 1 1
@@ -1318,10 +1459,10 @@ remove_tests()
 
 	# subflow and signal, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
 	chk_join_nr "remove subflow and signal" 2 2 2
 	chk_add_nr 1 1
@@ -1329,11 +1470,11 @@ remove_tests()
 
 	# subflows and signal, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow
 	chk_join_nr "remove subflows and signal" 3 3 3
 	chk_add_nr 1 1
@@ -1341,11 +1482,11 @@ remove_tests()
 
 	# addresses remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
 	chk_join_nr "remove addresses" 3 3 3
 	chk_add_nr 3 3
@@ -1353,11 +1494,11 @@ remove_tests()
 
 	# invalid addresses remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
 	chk_join_nr "remove invalid addresses" 1 1 1
 	chk_add_nr 3 3
@@ -1365,11 +1506,11 @@ remove_tests()
 
 	# subflows and signal, flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush subflows and signal" 3 3 3
 	chk_add_nr 1 1
@@ -1377,22 +1518,22 @@ remove_tests()
 
 	# subflows flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_set_limits $ns2 3 3
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush subflows" 3 3 3
 	chk_rm_nr 3 3
 
 	# addresses flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush addresses" 3 3 3
 	chk_add_nr 3 3
@@ -1400,11 +1541,11 @@ remove_tests()
 
 	# invalid addresses flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
 	chk_join_nr "flush invalid addresses" 1 1 1
 	chk_add_nr 3 3
@@ -1412,18 +1553,18 @@ remove_tests()
 
 	# remove id 0 subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
 	chk_join_nr "remove id 0 subflow" 1 1 1
 	chk_rm_nr 1 1
 
 	# remove id 0 address
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
 	chk_join_nr "remove id 0 address" 1 1 1
 	chk_add_nr 1 1
@@ -1434,37 +1575,37 @@ add_tests()
 {
 	# add single subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
 	chk_join_nr "add single subflow" 1 1 1
 
 	# add signal address
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
 	chk_join_nr "add signal address" 1 1 1
 	chk_add_nr 1 1
 
 	# add multiple subflows
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
 	run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
 	chk_join_nr "add multiple subflows" 2 2 2
 
 	# add multiple subflows IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow
 	chk_join_nr "add multiple subflows IPv6" 2 2 2
 
 	# add multiple addresses IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow
 	chk_join_nr "add multiple addresses IPv6" 2 2 2
 	chk_add_nr 2 2
@@ -1474,33 +1615,33 @@ ipv6_tests()
 {
 	# subflow IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "single subflow IPv6" 1 1 1
 
 	# add_address, unused IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "unused signal address IPv6" 0 0 0
 	chk_add_nr 1 1
 
 	# signal address IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "single address IPv6" 1 1 1
 	chk_add_nr 1 1
 
 	# single address IPv6, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
 	chk_join_nr "remove single address IPv6" 1 1 1
 	chk_add_nr 1 1
@@ -1508,10 +1649,10 @@ ipv6_tests()
 
 	# subflow and signal IPv6, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
 	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
 	chk_join_nr "remove subflow and signal IPv6" 2 2 2
 	chk_add_nr 1 1
@@ -1522,76 +1663,76 @@ v4mapped_tests()
 {
 	# subflow IPv4-mapped to IPv4-mapped
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "single subflow IPv4-mapped" 1 1 1
 
 	# signal address IPv4-mapped with IPv4-mapped sk
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "signal address IPv4-mapped" 1 1 1
 	chk_add_nr 1 1
 
 	# subflow v4-map-v6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "single subflow v4-map-v6" 1 1 1
 
 	# signal address v4-map-v6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "signal address v4-map-v6" 1 1 1
 	chk_add_nr 1 1
 
 	# subflow v6-map-v4
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow v6-map-v4" 1 1 1
 
 	# signal address v6-map-v4
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address v6-map-v4" 1 1 1
 	chk_add_nr 1 1
 
 	# no subflow IPv6 to v4 address
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
 
 	# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
 
 	# no subflow IPv4 to v6 address, no need to slow down too then
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 dead:beef:1::1
 	chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
 }
@@ -1600,50 +1741,60 @@ backup_tests()
 {
 	# single subflow, backup
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup
 	chk_join_nr "single subflow, backup" 1 1 1
 	chk_prio_nr 0 1
 
 	# single address, backup
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
 	chk_join_nr "single address, backup" 1 1 1
 	chk_add_nr 1 1
 	chk_prio_nr 1 0
+
+	# single address with port, backup
+	reset
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 1
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+	chk_join_nr "single address with port, backup" 1 1 1
+	chk_add_nr 1 1
+	chk_prio_nr 1 0
 }
 
 add_addr_ports_tests()
 {
 	# signal address with port
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address with port" 1 1 1
 	chk_add_nr 1 1 1
 
 	# subflow and signal with port
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and signal with port" 2 2 2
 	chk_add_nr 1 1 1
 
 	# single address with port, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address with port" 1 1 1
 	chk_add_nr 1 1 1
@@ -1651,10 +1802,10 @@ add_addr_ports_tests()
 
 	# subflow and signal with port, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
 	chk_join_nr "remove subflow and signal with port" 2 2 2
 	chk_add_nr 1 1 1
@@ -1662,11 +1813,11 @@ add_addr_ports_tests()
 
 	# subflows and signal with port, flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
 	chk_join_nr "flush subflows and signal with port" 3 3 3
 	chk_add_nr 1 1
@@ -1674,20 +1825,20 @@ add_addr_ports_tests()
 
 	# multiple addresses with port
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple addresses with port" 2 2 2
 	chk_add_nr 2 2 2
 
 	# multiple addresses with ports
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple addresses with ports" 2 2 2
 	chk_add_nr 2 2 2
@@ -1697,56 +1848,56 @@ syncookies_tests()
 {
 	# single subflow, syncookies
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow with syn cookies" 1 1 1
 
 	# multiple subflows with syn cookies
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows with syn cookies" 2 2 2
 
 	# multiple subflows limited by server
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflows limited by server w cookies" 2 1 1
 
 	# test signal address with cookies
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address with syn cookies" 1 1 1
 	chk_add_nr 1 1
 
 	# test cookie with subflow and signal
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and signal w cookies" 2 2 2
 	chk_add_nr 1 1
 
 	# accept and use add_addr with additional subflows
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflows and signal w. cookies" 3 3 3
 	chk_add_nr 1 1
@@ -1756,29 +1907,29 @@ checksum_tests()
 {
 	# checksum test 0 0
 	reset_with_checksum 0 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 0 0"
 
 	# checksum test 1 1
 	reset_with_checksum 1 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 1 1"
 
 	# checksum test 0 1
 	reset_with_checksum 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 0 1"
 
 	# checksum test 1 0
 	reset_with_checksum 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 1 0"
 }
@@ -1787,26 +1938,26 @@ deny_join_id0_tests()
 {
 	# subflow allow join id0 ns1
 	reset_with_allow_join_id0 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow allow join id0 ns1" 1 1 1
 
 	# subflow allow join id0 ns2
 	reset_with_allow_join_id0 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow allow join id0 ns2" 0 0 0
 
 	# signal address allow join id0 ns1
 	# ADD_ADDRs are not affected by allow_join_id0 value.
 	reset_with_allow_join_id0 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address allow join id0 ns1" 1 1 1
 	chk_add_nr 1 1
@@ -1814,28 +1965,28 @@ deny_join_id0_tests()
 	# signal address allow join id0 ns2
 	# ADD_ADDRs are not affected by allow_join_id0 value.
 	reset_with_allow_join_id0 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address allow join id0 ns2" 1 1 1
 	chk_add_nr 1 1
 
 	# subflow and address allow join id0 ns1
 	reset_with_allow_join_id0 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_set_limits $ns2 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and address allow join id0 1" 2 2 2
 
 	# subflow and address allow join id0 ns2
 	reset_with_allow_join_id0 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_set_limits $ns2 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and address allow join id0 2" 1 1 1
 }
@@ -1846,10 +1997,10 @@ fullmesh_tests()
 	# 2 fullmesh addrs in ns2, added before the connection,
 	# 1 non-fullmesh addr in ns1, added during the connection.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 4
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+	pm_nl_set_limits $ns1 0 4
+	pm_nl_set_limits $ns2 1 4
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
 	run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
 	chk_join_nr "fullmesh test 2x1" 4 4 4
 	chk_add_nr 1 1
@@ -1858,9 +2009,9 @@ fullmesh_tests()
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 1 fullmesh addr in ns2, added during the connection.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 1 3
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
 	chk_join_nr "fullmesh test 1x1" 3 3 3
 	chk_add_nr 1 1
@@ -1869,9 +2020,9 @@ fullmesh_tests()
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 2 fullmesh addrs in ns2, added during the connection.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 5
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 5
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 2 5
+	pm_nl_set_limits $ns2 1 5
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
 	chk_join_nr "fullmesh test 1x2" 5 5 5
 	chk_add_nr 1 1
@@ -1881,36 +2032,36 @@ fullmesh_tests()
 	# 2 fullmesh addrs in ns2, added during the connection,
 	# limit max_subflows to 4.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 2 4
+	pm_nl_set_limits $ns2 1 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
 	chk_join_nr "fullmesh test 1x2, limited" 4 4 4
 	chk_add_nr 1 1
 
 	# set fullmesh flag
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+	pm_nl_set_limits $ns2 4 4
 	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
 	chk_join_nr "set fullmesh flag test" 2 2 2
 	chk_rm_nr 0 1
 
 	# set nofullmesh flag
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
+	pm_nl_set_limits $ns2 4 4
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
 	chk_join_nr "set nofullmesh flag test" 2 2 2
 	chk_rm_nr 0 1
 
 	# set backup,fullmesh flags
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+	pm_nl_set_limits $ns2 4 4
 	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
 	chk_join_nr "set backup,fullmesh flags test" 2 2 2
 	chk_prio_nr 0 1
@@ -1918,9 +2069,9 @@ fullmesh_tests()
 
 	# set nobackup,nofullmesh flags
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_set_limits $ns2 4 4
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
 	chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
 	chk_prio_nr 0 1
@@ -1966,6 +2117,7 @@ usage()
 	echo "  -m fullmesh_tests"
 	echo "  -c capture pcap files"
 	echo "  -C enable data checksum"
+	echo "  -i use ip mptcp"
 	echo "  -h help"
 }
 
@@ -1987,9 +2139,12 @@ for arg in "$@"; do
 	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
 		checksum=1
 	fi
+	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"i"[0-9a-zA-Z]*$ ]]; then
+		ip_mptcp=1
+	fi
 
-	# exception for the capture/checksum options, the rest means: a part of the tests
-	if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
+	# exception for the capture/checksum/ip_mptcp options, the rest means: a part of the tests
+	if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ] && [ "${arg}" != "-i" ]; then
 		do_all_tests=0
 	fi
 done
@@ -1999,7 +2154,7 @@ if [ $do_all_tests -eq 1 ]; then
 	exit $ret
 fi
 
-while getopts 'fesltra64bpkdmchCS' opt; do
+while getopts 'fesltra64bpkdmchCSi' opt; do
 	case $opt in
 		f)
 			subflows_tests
@@ -2050,6 +2205,8 @@ while getopts 'fesltra64bpkdmchCS' opt; do
 			;;
 		C)
 			;;
+		i)
+			;;
 		h | *)
 			usage
 			;;
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index cbacf9f6538b..89839d1ff9d8 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -164,4 +164,22 @@ id 253 flags  10.0.0.5
 id 254 flags  10.0.0.2
 id 255 flags  10.0.0.3" "wrap-around ids"
 
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup 10.0.1.1" "set flags (backup)"
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" "          (nobackup)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,fullmesh 10.0.1.1" "          (fullmesh)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" "          (nofullmesh)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup,fullmesh 10.0.1.1" "          (backup,fullmesh)"
+
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 152b84e44d69..22a5ec1e128e 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -28,7 +28,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
-	fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh]\n");
+	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
 	fprintf(stderr, "\tflush\n");
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@@ -657,8 +657,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 	u_int32_t flags = 0;
 	u_int16_t family;
 	int nest_start;
+	int use_id = 0;
+	u_int8_t id;
 	int off = 0;
-	int arg;
+	int arg = 2;
 
 	memset(data, 0, sizeof(data));
 	nh = (void *)data;
@@ -674,29 +676,45 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 	nest->rta_len = RTA_LENGTH(0);
 	off += NLMSG_ALIGN(nest->rta_len);
 
-	/* addr data */
-	rta = (void *)(data + off);
-	if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
-		family = AF_INET;
-		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
-		rta->rta_len = RTA_LENGTH(4);
-	} else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
-		family = AF_INET6;
-		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
-		rta->rta_len = RTA_LENGTH(16);
+	if (!strcmp(argv[arg], "id")) {
+		if (++arg >= argc)
+			error(1, 0, " missing id value");
+
+		use_id = 1;
+		id = atoi(argv[arg]);
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+		rta->rta_len = RTA_LENGTH(1);
+		memcpy(RTA_DATA(rta), &id, 1);
+		off += NLMSG_ALIGN(rta->rta_len);
 	} else {
-		error(1, errno, "can't parse ip %s", argv[2]);
+		/* addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else {
+			error(1, errno, "can't parse ip %s", argv[arg]);
+		}
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
 	}
-	off += NLMSG_ALIGN(rta->rta_len);
 
-	/* family */
-	rta = (void *)(data + off);
-	rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
-	rta->rta_len = RTA_LENGTH(2);
-	memcpy(RTA_DATA(rta), &family, 2);
-	off += NLMSG_ALIGN(rta->rta_len);
+	if (++arg >= argc)
+		error(1, 0, " missing flags keyword");
 
-	for (arg = 3; arg < argc; arg++) {
+	for (; arg < argc; arg++) {
 		if (!strcmp(argv[arg], "flags")) {
 			char *tok, *str;
 
@@ -721,6 +739,21 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 			rta->rta_len = RTA_LENGTH(4);
 			memcpy(RTA_DATA(rta), &flags, 4);
 			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "port")) {
+			u_int16_t port;
+
+			if (use_id)
+				error(1, 0, " port can't be used with id");
+
+			if (++arg >= argc)
+				error(1, 0, " missing port value");
+
+			port = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
 		} else {
 			error(1, 0, "unknown keyword %s", argv[arg]);
 		}