diff options
365 files changed, 8716 insertions, 3973 deletions
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index ab08852e53ae..7940da9bc6c1 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -503,6 +503,19 @@ valid index (starting from 0) pointing to a member or an argument. * ``info.vlen``: 0 * ``type``: the type with ``btf_type_tag`` attribute +Currently, ``BTF_KIND_TYPE_TAG`` is only emitted for pointer types. +It has the following btf type chain: +:: + + ptr -> [type_tag]* + -> [const | volatile | restrict | typedef]* + -> base_type + +Basically, a pointer type points to zero or more +type_tag, then zero or more const/volatile/restrict/typedef +and finally the base type. The base type is one of +int, ptr, array, struct, union, enum, func_proto and float types. + 3. BTF Kernel API ================= diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 3704836fe6df..5300837ac2c9 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -22,7 +22,13 @@ necessary across calls. Instruction encoding ==================== -eBPF uses 64-bit instructions with the following encoding: +eBPF has two instruction encodings: + + * the basic instruction encoding, which uses 64 bits to encode an instruction + * the wide instruction encoding, which appends a second 64-bit immediate value + (imm64) after the basic instruction for a total of 128 bits. + +The basic instruction encoding looks as follows: ============= ======= =============== ==================== ============ 32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) @@ -82,9 +88,9 @@ BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for otherwise identical operations. The code field encodes the operation as below: - ======== ===== ========================== + ======== ===== ================================================= code value description - ======== ===== ========================== + ======== ===== ================================================= BPF_ADD 0x00 dst += src BPF_SUB 0x10 dst -= src BPF_MUL 0x20 dst \*= src @@ -98,8 +104,8 @@ The code field encodes the operation as below: BPF_XOR 0xa0 dst ^= src BPF_MOV 0xb0 dst = src BPF_ARSH 0xc0 sign extending shift right - BPF_END 0xd0 endianness conversion - ======== ===== ========================== + BPF_END 0xd0 byte swap operations (see separate section below) + ======== ===== ================================================= BPF_ADD | BPF_X | BPF_ALU means:: @@ -118,6 +124,42 @@ BPF_XOR | BPF_K | BPF_ALU64 means:: src_reg = src_reg ^ imm32 +Byte swap instructions +---------------------- + +The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit +code field of ``BPF_END``. + +The byte swap instructions instructions operate on the destination register +only and do not use a separate source register or immediate value. + +The 1-bit source operand field in the opcode is used to to select what byte +order the operation convert from or to: + + ========= ===== ================================================= + source value description + ========= ===== ================================================= + BPF_TO_LE 0x00 convert between host byte order and little endian + BPF_TO_BE 0x08 convert between host byte order and big endian + ========= ===== ================================================= + +The imm field encodes the width of the swap operations. The following widths +are supported: 16, 32 and 64. + +Examples: + +``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means:: + + dst_reg = htole16(dst_reg) + +``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means:: + + dst_reg = htobe64(dst_reg) + +``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and +``BPF_TO_LE`` respetively. + + Jump instructions ----------------- @@ -176,63 +218,96 @@ The mode modifier is one of: ============= ===== ==================================== mode modifier value description ============= ===== ==================================== - BPF_IMM 0x00 used for 64-bit mov - BPF_ABS 0x20 legacy BPF packet access - BPF_IND 0x40 legacy BPF packet access - BPF_MEM 0x60 all normal load and store operations + BPF_IMM 0x00 64-bit immediate instructions + BPF_ABS 0x20 legacy BPF packet access (absolute) + BPF_IND 0x40 legacy BPF packet access (indirect) + BPF_MEM 0x60 regular load and store operations BPF_ATOMIC 0xc0 atomic operations ============= ===== ==================================== -BPF_MEM | <size> | BPF_STX means:: + +Regular load and store operations +--------------------------------- + +The ``BPF_MEM`` mode modifier is used to encode regular load and store +instructions that transfer data between a register and memory. + +``BPF_MEM | <size> | BPF_STX`` means:: *(size *) (dst_reg + off) = src_reg -BPF_MEM | <size> | BPF_ST means:: +``BPF_MEM | <size> | BPF_ST`` means:: *(size *) (dst_reg + off) = imm32 -BPF_MEM | <size> | BPF_LDX means:: +``BPF_MEM | <size> | BPF_LDX`` means:: dst_reg = *(size *) (src_reg + off) -Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. +Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``. Atomic operations ----------------- -eBPF includes atomic operations, which use the immediate field for extra -encoding:: +Atomic operations are operations that operate on memory and can not be +interrupted or corrupted by other access to the same memory region +by other eBPF programs or means outside of this specification. + +All atomic operations supported by eBPF are encoded as store operations +that use the ``BPF_ATOMIC`` mode modifier as follows: + + * ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations + * ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations + * 8-bit and 16-bit wide atomic operations are not supported. - .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg - .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg +The imm field is used to encode the actual atomic operation. +Simple atomic operation use a subset of the values defined to encode +arithmetic operations in the imm field to encode the atomic operation: -The basic atomic operations supported are:: + ======== ===== =========== + imm value description + ======== ===== =========== + BPF_ADD 0x00 atomic add + BPF_OR 0x40 atomic or + BPF_AND 0x50 atomic and + BPF_XOR 0xa0 atomic xor + ======== ===== =========== - BPF_ADD - BPF_AND - BPF_OR - BPF_XOR -Each having equivalent semantics with the ``BPF_ADD`` example, that is: the -memory location addresed by ``dst_reg + off`` is atomically modified, with -``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the -immediate, then these operations also overwrite ``src_reg`` with the -value that was in memory before it was modified. +``BPF_ATOMIC | BPF_W | BPF_STX`` with imm = BPF_ADD means:: -The more special operations are:: + *(u32 *)(dst_reg + off16) += src_reg - BPF_XCHG +``BPF_ATOMIC | BPF_DW | BPF_STX`` with imm = BPF ADD means:: -This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg + -off``. :: + *(u64 *)(dst_reg + off16) += src_reg - BPF_CMPXCHG +``BPF_XADD`` is a deprecated name for ``BPF_ATOMIC | BPF_ADD``. -This atomically compares the value addressed by ``dst_reg + off`` with -``R0``. If they match it is replaced with ``src_reg``. In either case, the -value that was there before is zero-extended and loaded back to ``R0``. +In addition to the simple atomic operations, there also is a modifier and +two complex atomic operations: -Note that 1 and 2 byte atomic operations are not supported. + =========== ================ =========================== + imm value description + =========== ================ =========================== + BPF_FETCH 0x01 modifier: return old value + BPF_XCHG 0xe0 | BPF_FETCH atomic exchange + BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange + =========== ================ =========================== + +The ``BPF_FETCH`` modifier is optional for simple atomic operations, and +always set for the complex atomic operations. If the ``BPF_FETCH`` flag +is set, then the operation also overwrites ``src_reg`` with the value that +was in memory before it was modified. + +The ``BPF_XCHG`` operation atomically exchanges ``src_reg`` with the value +addressed by ``dst_reg + off``. + +The ``BPF_CMPXCHG`` operation atomically compares the value addressed by +``dst_reg + off`` with ``R0``. If they match, the value addressed by +``dst_reg + off`` is replaced with ``src_reg``. In either case, the +value that was at ``dst_reg + off`` before the operation is zero-extended +and loaded back to ``R0``. Clang can generate atomic instructions by default when ``-mcpu=v3`` is enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction @@ -240,40 +315,52 @@ Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable the atomics features, while keeping a lower ``-mcpu`` version, you can use ``-Xclang -target-feature -Xclang +alu32``. -You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``, -referring to the exclusive-add operation encoded when the immediate field is -zero. +64-bit immediate instructions +----------------------------- -16-byte instructions --------------------- +Instructions with the ``BPF_IMM`` mode modifier use the wide instruction +encoding for an extra imm64 value. -eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists -of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single -instruction that loads 64-bit immediate value into a dst_reg. +There is currently only one such instruction. -Packet access instructions --------------------------- +``BPF_LD | BPF_DW | BPF_IMM`` means:: -eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and -(BPF_IND | <size> | BPF_LD) which are used to access packet data. + dst_reg = imm64 -They had to be carried over from classic BPF to have strong performance of -socket filters running in eBPF interpreter. These instructions can only -be used when interpreter context is a pointer to ``struct sk_buff`` and -have seven implicit operands. Register R6 is an implicit input that must -contain pointer to sk_buff. Register R0 is an implicit output which contains -the data fetched from the packet. Registers R1-R5 are scratch registers -and must not be used to store the data across BPF_ABS | BPF_LD or -BPF_IND | BPF_LD instructions. -These instructions have implicit program exit condition as well. When -eBPF program is trying to access the data beyond the packet boundary, -the interpreter will abort the execution of the program. JIT compilers -therefore must preserve this property. src_reg and imm32 fields are -explicit inputs to these instructions. +Legacy BPF Packet access instructions +------------------------------------- -For example, BPF_IND | BPF_W | BPF_LD means:: +eBPF has special instructions for access to packet data that have been +carried over from classic BPF to retain the performance of legacy socket +filters running in the eBPF interpreter. - R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) +The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and +``BPF_IND | <size> | BPF_LD``. -and R1 - R5 are clobbered. +These instructions are used to access packet data and can only be used when +the program context is a pointer to networking packet. ``BPF_ABS`` +accesses packet data at an absolute offset specified by the immediate data +and ``BPF_IND`` access packet data at an offset that includes the value of +a register in addition to the immediate data. + +These instructions have seven implicit operands: + + * Register R6 is an implicit input that must contain pointer to a + struct sk_buff. + * Register R0 is an implicit output which contains the data fetched from + the packet. + * Registers R1-R5 are scratch registers that are clobbered after a call to + ``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions. + +These instructions have an implicit program exit condition as well. When an +eBPF program is trying to access the data beyond the packet boundary, the +program execution will be aborted. + +``BPF_ABS | BPF_W | BPF_LD`` means:: + + R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + imm32)) + +``BPF_IND | BPF_W | BPF_LD`` means:: + + R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index bda821065a2b..ee2ccacc39ff 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -45,8 +45,10 @@ properties: - items: - enum: + - renesas,r9a07g043-gbeth # RZ/G2UL - renesas,r9a07g044-gbeth # RZ/G2{L,LC} - - const: renesas,rzg2l-gbeth # RZ/G2L + - renesas,r9a07g054-gbeth # RZ/V2L + - const: renesas,rzg2l-gbeth # RZ/{G2L,G2UL,V2L} family reg: true diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst index 46f74bffce0f..c628cb5406d2 100644 --- a/Documentation/networking/mctp.rst +++ b/Documentation/networking/mctp.rst @@ -212,6 +212,54 @@ remote address is already known, or the message does not require a reply. Like the send calls, sockets will only receive responses to requests they have sent (TO=1) and may only respond (TO=0) to requests they have received. +``ioctl(SIOCMCTPALLOCTAG)`` and ``ioctl(SIOCMCTPDROPTAG)`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These tags give applications more control over MCTP message tags, by allocating +(and dropping) tag values explicitly, rather than the kernel automatically +allocating a per-message tag at ``sendmsg()`` time. + +In general, you will only need to use these ioctls if your MCTP protocol does +not fit the usual request/response model. For example, if you need to persist +tags across multiple requests, or a request may generate more than one response. +In these cases, the ioctls allow you to decouple the tag allocation (and +release) from individual message send and receive operations. + +Both ioctls are passed a pointer to a ``struct mctp_ioc_tag_ctl``: + +.. code-block:: C + + struct mctp_ioc_tag_ctl { + mctp_eid_t peer_addr; + __u8 tag; + __u16 flags; + }; + +``SIOCMCTPALLOCTAG`` allocates a tag for a specific peer, which an application +can use in future ``sendmsg()`` calls. The application populates the +``peer_addr`` member with the remote EID. Other fields must be zero. + +On return, the ``tag`` member will be populated with the allocated tag value. +The allocated tag will have the following tag bits set: + + - ``MCTP_TAG_OWNER``: it only makes sense to allocate tags if you're the tag + owner + + - ``MCTP_TAG_PREALLOC``: to indicate to ``sendmsg()`` that this is a + preallocated tag. + + - ... and the actual tag value, within the least-significant three bits + (``MCTP_TAG_MASK``). Note that zero is a valid tag value. + +The tag value should be used as-is for the ``smctp_tag`` member of ``struct +sockaddr_mctp``. + +``SIOCMCTPDROPTAG`` releases a tag that has been previously allocated by a +``SIOCMCTPALLOCTAG`` ioctl. The ``peer_addr`` must be the same as used for the +allocation, and the ``tag`` value must match exactly the tag returned from the +allocation (including the ``MCTP_TAG_OWNER`` and ``MCTP_TAG_PREALLOC`` bits). +The ``flags`` field must be zero. + Kernel internals ================ diff --git a/MAINTAINERS b/MAINTAINERS index c7169da15ab4..e70f5b26bc50 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3523,6 +3523,8 @@ F: net/sched/act_bpf.c F: net/sched/cls_bpf.c F: samples/bpf/ F: scripts/bpf_doc.py +F: scripts/pahole-flags.sh +F: scripts/pahole-version.sh F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index e96d4d87291f..74f9a9b6a053 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1143,6 +1143,11 @@ out: return prog; } +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} + u64 bpf_jit_alloc_exec_limit(void) { return VMALLOC_END - VMALLOC_START; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 56dd1f4e3e44..a4f4d347e6bd 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -264,7 +264,7 @@ skip_codegen_passes: fp->jited = 1; fp->jited_len = proglen + FUNCTION_DESCR_SIZE; - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size); if (!fp->is_func || extra_pass) { bpf_jit_binary_lock_ro(bpf_hdr); bpf_prog_fill_jited_linfo(fp, addrs); diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index b1e38784eb23..fa0759bfe498 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1599,7 +1599,7 @@ skip_init_ctx: if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, image_size, pass, ctx.image); - bpf_flush_icache(header, (u8 *)header + (header->pages * PAGE_SIZE)); + bpf_flush_icache(header, (u8 *)header + header->size); if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(header); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660..995f2dc28631 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -158,6 +158,7 @@ config X86 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if X86_64 diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index b7421780e4e9..4cc18ba1b75e 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -44,6 +44,7 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); +extern void *text_poke_copy(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5007c3ffe96f..018b61febf0e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1102,6 +1102,40 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len) return __text_poke(addr, opcode, len); } +/** + * text_poke_copy - Copy instructions into (an unused part of) RX memory + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy, could be more than 2x PAGE_SIZE + * + * Not safe against concurrent execution; useful for JITs to dump + * new code blocks into unused regions of RX memory. Can be used in + * conjunction with synchronize_rcu_tasks() to wait for existing + * execution to quiesce after having made sure no existing functions + * pointers are live. + */ +void *text_poke_copy(void *addr, const void *opcode, size_t len) +{ + unsigned long start = (unsigned long)addr; + size_t patched = 0; + + if (WARN_ON_ONCE(core_kernel_text(start))) + return NULL; + + mutex_lock(&text_mutex); + while (patched < len) { + unsigned long ptr = start + patched; + size_t s; + + s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); + + __text_poke((void *)ptr, opcode + patched, s); + patched += s; + } + mutex_unlock(&text_mutex); + return addr; +} + static void do_sync_core(void *info) { sync_core(); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 2b1e266ff95c..c7db0fe4de2f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -330,8 +330,7 @@ static int emit_jump(u8 **pprog, void *func, void *ip) } static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, - void *old_addr, void *new_addr, - const bool text_live) + void *old_addr, void *new_addr) { const u8 *nop_insn = x86_nops[5]; u8 old_insn[X86_PATCH_SIZE]; @@ -365,10 +364,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, goto out; ret = 1; if (memcmp(ip, new_insn, X86_PATCH_SIZE)) { - if (text_live) - text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL); - else - memcpy(ip, new_insn, X86_PATCH_SIZE); + text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL); ret = 0; } out: @@ -384,7 +380,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, /* BPF poking in modules is not supported */ return -EINVAL; - return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); + return __bpf_arch_text_poke(ip, t, old_addr, new_addr); } #define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) @@ -558,24 +554,15 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) mutex_lock(&array->aux->poke_mutex); target = array->ptrs[poke->tail_call.key]; if (target) { - /* Plain memcpy is used when image is not live yet - * and still not locked as read-only. Once poke - * location is active (poke->tailcall_target_stable), - * any parallel bpf_arch_text_poke() might occur - * still on the read-write image until we finally - * locked it as read-only. Both modifications on - * the given image are under text_mutex to avoid - * interference. - */ ret = __bpf_arch_text_poke(poke->tailcall_target, BPF_MOD_JUMP, NULL, (u8 *)target->bpf_func + - poke->adj_off, false); + poke->adj_off); BUG_ON(ret < 0); ret = __bpf_arch_text_poke(poke->tailcall_bypass, BPF_MOD_JUMP, (u8 *)poke->tailcall_target + - X86_PATCH_SIZE, NULL, false); + X86_PATCH_SIZE, NULL); BUG_ON(ret < 0); } WRITE_ONCE(poke->tailcall_target_stable, true); @@ -787,7 +774,6 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, /* emit opcode */ switch (atomic_op) { case BPF_ADD: - case BPF_SUB: case BPF_AND: case BPF_OR: case BPF_XOR: @@ -867,7 +853,7 @@ static void emit_nops(u8 **pprog, int len) #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) -static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, +static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; @@ -894,8 +880,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, push_callee_regs(&prog, callee_regs_used); ilen = prog - temp; - if (image) - memcpy(image + proglen, temp, ilen); + if (rw_image) + memcpy(rw_image + proglen, temp, ilen); proglen += ilen; addrs[0] = proglen; prog = temp; @@ -1324,6 +1310,9 @@ st: if (is_imm8(insn->off)) pr_err("extable->insn doesn't fit into 32-bit\n"); return -EFAULT; } + /* switch ex to rw buffer for writes */ + ex = (void *)rw_image + ((void *)ex - (void *)image); + ex->insn = delta; ex->data = EX_TYPE_BPF; @@ -1706,7 +1695,7 @@ emit_jmp: pr_err("bpf_jit: fatal error\n"); return -EFAULT; } - memcpy(image + proglen, temp, ilen); + memcpy(rw_image + proglen, temp, ilen); } proglen += ilen; addrs[i] = proglen; @@ -2247,6 +2236,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) } struct x64_jit_data { + struct bpf_binary_header *rw_header; struct bpf_binary_header *header; int *addrs; u8 *image; @@ -2259,6 +2249,7 @@ struct x64_jit_data { struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { + struct bpf_binary_header *rw_header = NULL; struct bpf_binary_header *header = NULL; struct bpf_prog *tmp, *orig_prog = prog; struct x64_jit_data *jit_data; @@ -2267,6 +2258,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bool tmp_blinded = false; bool extra_pass = false; bool padding = false; + u8 *rw_image = NULL; u8 *image = NULL; int *addrs; int pass; @@ -2302,6 +2294,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) oldproglen = jit_data->proglen; image = jit_data->image; header = jit_data->header; + rw_header = jit_data->rw_header; + rw_image = (void *)rw_header + ((void *)image - (void *)header); extra_pass = true; padding = true; goto skip_init_addrs; @@ -2332,12 +2326,12 @@ skip_init_addrs: for (pass = 0; pass < MAX_PASSES || image; pass++) { if (!padding && pass >= PADDING_PASSES) padding = true; - proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding); + proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding); if (proglen <= 0) { out_image: image = NULL; if (header) - bpf_jit_binary_free(header); + bpf_jit_binary_pack_free(header, rw_header); prog = orig_prog; goto out_addrs; } @@ -2361,8 +2355,9 @@ out_image: sizeof(struct exception_table_entry); /* allocate module memory for x86 insns and extable */ - header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size, - &image, align, jit_fill_hole); + header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size, + &image, align, &rw_header, &rw_image, + jit_fill_hole); if (!header) { prog = orig_prog; goto out_addrs; @@ -2378,14 +2373,26 @@ out_image: if (image) { if (!prog->is_func || extra_pass) { + /* + * bpf_jit_binary_pack_finalize fails in two scenarios: + * 1) header is not pointing to proper module memory; + * 2) the arch doesn't support bpf_arch_text_copy(). + * + * Both cases are serious bugs and justify WARN_ON. + */ + if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) { + prog = orig_prog; + goto out_addrs; + } + bpf_tail_call_direct_fixup(prog); - bpf_jit_binary_lock_ro(header); } else { jit_data->addrs = addrs; jit_data->ctx = ctx; jit_data->proglen = proglen; jit_data->image = image; jit_data->header = header; + jit_data->rw_header = rw_header; } prog->bpf_func = (void *)image; prog->jited = 1; @@ -2413,3 +2420,10 @@ bool bpf_jit_supports_kfunc_call(void) { return true; } + +void *bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + if (text_poke_copy(dst, src, len) == NULL) + return ERR_PTR(-EINVAL); + return dst; +} diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c98a4b0a8453..303c8d32d451 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -19,6 +19,7 @@ #include <linux/in.h> #include <net/arp.h> #include <net/ipv6.h> +#include <net/ndisc.h> #include <asm/byteorder.h> #include <net/bonding.h> #include <net/bond_alb.h> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 238b56d77c36..617c2bf8c5a7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -6048,27 +6048,38 @@ static int __net_init bond_net_init(struct net *net) return 0; } -static void __net_exit bond_net_exit(struct net *net) +static void __net_exit bond_net_exit_batch(struct list_head *net_list) { - struct bond_net *bn = net_generic(net, bond_net_id); - struct bonding *bond, *tmp_bond; + struct bond_net *bn; + struct net *net; LIST_HEAD(list); - bond_destroy_sysfs(bn); + list_for_each_entry(net, net_list, exit_list) { + bn = net_generic(net, bond_net_id); + bond_destroy_sysfs(bn); + } /* Kill off any bonds created after unregistering bond rtnl ops */ rtnl_lock(); - list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) - unregister_netdevice_queue(bond->dev, &list); + list_for_each_entry(net, net_list, exit_list) { + struct bonding *bond, *tmp_bond; + + bn = net_generic(net, bond_net_id); + list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) + unregister_netdevice_queue(bond->dev, &list); + } unregister_netdevice_many(&list); rtnl_unlock(); - bond_destroy_proc_dir(bn); + list_for_each_entry(net, net_list, exit_list) { + bn = net_generic(net, bond_net_id); + bond_destroy_proc_dir(bn); + } } static struct pernet_operations bond_net_ops = { .init = bond_net_init, - .exit = bond_net_exit, + .exit_batch = bond_net_exit_batch, .id = &bond_net_id, .size = sizeof(struct bond_net), }; diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 46b150e6289e..cfe37be42be4 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -307,7 +307,6 @@ void __net_init bond_create_proc_dir(struct bond_net *bn) } /* Destroy the bonding directory under /proc/net, if empty. - * Caller must hold rtnl_lock. */ void __net_exit bond_destroy_proc_dir(struct bond_net *bn) { diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 7670796c2aa1..c54649c4c3a0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -585,7 +585,7 @@ static void mv88e6185_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, { u8 cmode = chip->ports[port].cmode; - if (cmode <= ARRAY_SIZE(mv88e6185_phy_interface_modes) && + if (cmode < ARRAY_SIZE(mv88e6185_phy_interface_modes) && mv88e6185_phy_interface_modes[cmode]) __set_bit(mv88e6185_phy_interface_modes[cmode], config->supported_interfaces); @@ -2602,7 +2602,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, err = mv88e6xxx_port_set_map_da(chip, port, true); if (err) - return err; + goto unlock; err = mv88e6xxx_port_commit_pvid(chip, port); if (err) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 83066af3757f..bb2fbb6826ac 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1018,7 +1018,7 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, clear_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &clear_val, QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val)); - if (!write_skb) { + if (!clear_skb) { ret = -ENOMEM; goto err_clear_skb; } diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 5242698143d9..b7427a8292b2 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -12,7 +12,6 @@ menuconfig NET_DSA_REALTEK config NET_DSA_REALTEK_MDIO tristate "Realtek MDIO connected switch driver" depends on NET_DSA_REALTEK - default y help Select to enable support for registering switches configured through MDIO. @@ -20,14 +19,12 @@ config NET_DSA_REALTEK_MDIO config NET_DSA_REALTEK_SMI tristate "Realtek SMI connected switch driver" depends on NET_DSA_REALTEK - default y help Select to enable support for registering switches connected through SMI. config NET_DSA_REALTEK_RTL8365MB tristate "Realtek RTL8365MB switch subdriver" - default y depends on NET_DSA_REALTEK depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL8_4 @@ -36,7 +33,6 @@ config NET_DSA_REALTEK_RTL8365MB config NET_DSA_REALTEK_RTL8366RB tristate "Realtek RTL8366RB switch subdriver" - default y depends on NET_DSA_REALTEK depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL4_A diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index e1c5a67a21c4..2ed592147c20 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -952,39 +952,17 @@ static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port, return false; } -static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) +static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - struct realtek_priv *priv = ds->priv; - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0 }; - - /* include/linux/phylink.h says: - * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink - * expects the MAC driver to return all supported link modes. - */ - if (state->interface != PHY_INTERFACE_MODE_NA && - !rtl8365mb_phy_mode_supported(ds, port, state->interface)) { - dev_err(priv->dev, "phy mode %s is unsupported on port %d\n", - phy_modes(state->interface), port); - linkmode_zero(supported); - return; - } - - phylink_set_port_modes(mask); - - phylink_set(mask, Autoneg); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); - - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - phylink_set(mask, 1000baseT_Full); - - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); + if (dsa_is_user_port(ds, port)) + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); + else if (dsa_is_cpu_port(ds, port)) + phy_interface_set_rgmii(config->supported_interfaces); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000FD; } static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port, @@ -2020,7 +1998,7 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = { .get_tag_protocol = rtl8365mb_get_tag_protocol, .setup = rtl8365mb_setup, .teardown = rtl8365mb_teardown, - .phylink_validate = rtl8365mb_phylink_validate, + .phylink_get_caps = rtl8365mb_phylink_get_caps, .phylink_mac_config = rtl8365mb_phylink_mac_config, .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down, .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up, @@ -2038,7 +2016,7 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = { .get_tag_protocol = rtl8365mb_get_tag_protocol, .setup = rtl8365mb_setup, .teardown = rtl8365mb_teardown, - .phylink_validate = rtl8365mb_phylink_validate, + .phylink_get_caps = rtl8365mb_phylink_get_caps, .phylink_mac_config = rtl8365mb_phylink_mac_config, .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down, .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up, diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 8aec5d9fbfef..ad57209007e1 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -138,11 +138,6 @@ MODULE_PARM_DESC(use_mmio, "Use MMIO (1) or PIO(0) to access the NIC. " module_param(rx_copybreak, int, 0); module_param(use_mmio, int, 0); -#if defined(NETIF_F_TSO) && MAX_SKB_FRAGS > 32 -#warning Typhoon only supports 32 entries in its SG list for TSO, disabling TSO -#undef NETIF_F_TSO -#endif - #if TXLO_ENTRIES <= (2 * MAX_SKB_FRAGS) #error TX ring too small! #endif @@ -2261,9 +2256,28 @@ out: return mode; } +#if MAX_SKB_FRAGS > 32 + +#include <net/vxlan.h> + +static netdev_features_t typhoon_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + if (skb_shinfo(skb)->nr_frags > 32 && skb_is_gso(skb)) + features &= ~NETIF_F_GSO_MASK; + + features = vlan_features_check(skb, features); + return vxlan_features_check(skb, features); +} +#endif + static const struct net_device_ops typhoon_netdev_ops = { .ndo_open = typhoon_open, .ndo_stop = typhoon_close, +#if MAX_SKB_FRAGS > 32 + .ndo_features_check = typhoon_features_check, +#endif .ndo_start_xmit = typhoon_start_tx, .ndo_set_rx_mode = typhoon_set_rx_mode, .ndo_tx_timeout = typhoon_tx_timeout, diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 537e6a85e18d..fbf4588994ac 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -2413,11 +2413,13 @@ static void et131x_tx_dma_memory_free(struct et131x_adapter *adapter) kfree(tx_ring->tcb_ring); } +#define MAX_TX_DESC_PER_PKT 24 + /* nic_send_packet - NIC specific send handler for version B silicon. */ static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb) { u32 i; - struct tx_desc desc[24]; + struct tx_desc desc[MAX_TX_DESC_PER_PKT]; u32 frag = 0; u32 thiscopy, remainder; struct sk_buff *skb = tcb->skb; @@ -2432,9 +2434,6 @@ static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb) * more than 5 fragments. */ - /* nr_frags should be no more than 18. */ - BUILD_BUG_ON(MAX_SKB_FRAGS + 1 > 23); - memset(desc, 0, sizeof(struct tx_desc) * (nr_frags + 1)); for (i = 0; i < nr_frags; i++) { @@ -3762,6 +3761,13 @@ static netdev_tx_t et131x_tx(struct sk_buff *skb, struct net_device *netdev) struct et131x_adapter *adapter = netdev_priv(netdev); struct tx_ring *tx_ring = &adapter->tx_ring; + /* This driver does not support TSO, it is very unlikely + * this condition is true. + */ + if (unlikely(skb_shinfo(skb)->nr_frags > MAX_TX_DESC_PER_PKT - 2)) { + if (skb_linearize(skb)) + goto drop_err; + } /* stop the queue if it's getting full */ if (tx_ring->used >= NUM_TCB - 1 && !netif_queue_stopped(netdev)) netif_stop_queue(netdev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index a19dd6797070..447a75ea0cc1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1271,7 +1271,7 @@ struct bnx2x_fw_stats_data { struct per_port_stats port; struct per_pf_stats pf; struct fcoe_statistics_params fcoe; - struct per_queue_stats queue_stats[1]; + struct per_queue_stats queue_stats[]; }; /* Public slow path states */ diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index ba28aa444e5a..8e07192e409f 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1539,7 +1539,7 @@ static int liquidio_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) * compute the delta in terms of coprocessor clocks. */ delta = (u64)ppb << 32; - do_div(delta, oct->coproc_clock_rate); + div64_u64(delta, oct->coproc_clock_rate); spin_lock_irqsave(&lio->ptp_lock, flags); comp = lio_pci_readq(oct, CN6XXX_MIO_PTP_CLOCK_COMP); @@ -1672,7 +1672,7 @@ static void liquidio_ptp_init(struct octeon_device *oct) u64 clock_comp, cfg; clock_comp = (u64)NSEC_PER_SEC << 32; - do_div(clock_comp, oct->coproc_clock_rate); + div64_u64(clock_comp, oct->coproc_clock_rate); lio_pci_writeq(oct, clock_comp, CN6XXX_MIO_PTP_CLOCK_COMP); /* Enable */ diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index c710dc17be90..8dd7bf9014ec 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -340,7 +340,7 @@ enum wake_event_bits { struct netdev_desc { __le32 next_desc; __le32 status; - struct desc_frag { __le32 addr, length; } frag[1]; + struct desc_frag { __le32 addr, length; } frag; }; /* Bits in netdev_desc.status */ @@ -980,8 +980,8 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue) le32_to_cpu(np->tx_ring[i].next_desc), le32_to_cpu(np->tx_ring[i].status), (le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff, - le32_to_cpu(np->tx_ring[i].frag[0].addr), - le32_to_cpu(np->tx_ring[i].frag[0].length)); + le32_to_cpu(np->tx_ring[i].frag.addr), + le32_to_cpu(np->tx_ring[i].frag.length)); } printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n", ioread32(np->base + TxListPtr), @@ -1027,7 +1027,7 @@ static void init_ring(struct net_device *dev) np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma + ((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring)); np->rx_ring[i].status = 0; - np->rx_ring[i].frag[0].length = 0; + np->rx_ring[i].frag.length = 0; np->rx_skbuff[i] = NULL; } @@ -1039,16 +1039,16 @@ static void init_ring(struct net_device *dev) if (skb == NULL) break; skb_reserve(skb, 2); /* 16 byte align the IP header. */ - np->rx_ring[i].frag[0].addr = cpu_to_le32( + np->rx_ring[i].frag.addr = cpu_to_le32( dma_map_single(&np->pci_dev->dev, skb->data, np->rx_buf_sz, DMA_FROM_DEVICE)); if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[i].frag[0].addr)) { + np->rx_ring[i].frag.addr)) { dev_kfree_skb(skb); np->rx_skbuff[i] = NULL; break; } - np->rx_ring[i].frag[0].length = cpu_to_le32(np->rx_buf_sz | LastFrag); + np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); } np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); @@ -1097,12 +1097,12 @@ start_tx (struct sk_buff *skb, struct net_device *dev) txdesc->next_desc = 0; txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign); - txdesc->frag[0].addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, + txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, skb->data, skb->len, DMA_TO_DEVICE)); if (dma_mapping_error(&np->pci_dev->dev, - txdesc->frag[0].addr)) + txdesc->frag.addr)) goto drop_frame; - txdesc->frag[0].length = cpu_to_le32 (skb->len | LastFrag); + txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag); /* Increment cur_tx before tasklet_schedule() */ np->cur_tx++; @@ -1151,7 +1151,7 @@ reset_tx (struct net_device *dev) skb = np->tx_skbuff[i]; if (skb) { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[i].frag[0].addr), + le32_to_cpu(np->tx_ring[i].frag.addr), skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(skb); np->tx_skbuff[i] = NULL; @@ -1271,12 +1271,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) skb = np->tx_skbuff[entry]; /* Free the original skb. */ dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[entry].frag[0].addr), + le32_to_cpu(np->tx_ring[entry].frag.addr), skb->len, DMA_TO_DEVICE); dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; - np->tx_ring[entry].frag[0].addr = 0; - np->tx_ring[entry].frag[0].length = 0; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; } spin_unlock(&np->lock); } else { @@ -1290,12 +1290,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) skb = np->tx_skbuff[entry]; /* Free the original skb. */ dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[entry].frag[0].addr), + le32_to_cpu(np->tx_ring[entry].frag.addr), skb->len, DMA_TO_DEVICE); dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; - np->tx_ring[entry].frag[0].addr = 0; - np->tx_ring[entry].frag[0].length = 0; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; } spin_unlock(&np->lock); } @@ -1372,16 +1372,16 @@ static void rx_poll(struct tasklet_struct *t) (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { skb_reserve(skb, 2); /* 16 byte align the IP header */ dma_sync_single_for_cpu(&np->pci_dev->dev, - le32_to_cpu(desc->frag[0].addr), + le32_to_cpu(desc->frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); dma_sync_single_for_device(&np->pci_dev->dev, - le32_to_cpu(desc->frag[0].addr), + le32_to_cpu(desc->frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); skb_put(skb, pkt_len); } else { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(desc->frag[0].addr), + le32_to_cpu(desc->frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); skb_put(skb = np->rx_skbuff[entry], pkt_len); np->rx_skbuff[entry] = NULL; @@ -1427,18 +1427,18 @@ static void refill_rx (struct net_device *dev) if (skb == NULL) break; /* Better luck next round. */ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ - np->rx_ring[entry].frag[0].addr = cpu_to_le32( + np->rx_ring[entry].frag.addr = cpu_to_le32( dma_map_single(&np->pci_dev->dev, skb->data, np->rx_buf_sz, DMA_FROM_DEVICE)); if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[entry].frag[0].addr)) { + np->rx_ring[entry].frag.addr)) { dev_kfree_skb_irq(skb); np->rx_skbuff[entry] = NULL; break; } } /* Perhaps we need not reset this field. */ - np->rx_ring[entry].frag[0].length = + np->rx_ring[entry].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); np->rx_ring[entry].status = 0; cnt++; @@ -1870,14 +1870,14 @@ static int netdev_close(struct net_device *dev) (int)(np->tx_ring_dma)); for (i = 0; i < TX_RING_SIZE; i++) printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n", - i, np->tx_ring[i].status, np->tx_ring[i].frag[0].addr, - np->tx_ring[i].frag[0].length); + i, np->tx_ring[i].status, np->tx_ring[i].frag.addr, + np->tx_ring[i].frag.length); printk(KERN_DEBUG " Rx ring %8.8x:\n", (int)(np->rx_ring_dma)); for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) { printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n", - i, np->rx_ring[i].status, np->rx_ring[i].frag[0].addr, - np->rx_ring[i].frag[0].length); + i, np->rx_ring[i].status, np->rx_ring[i].frag.addr, + np->rx_ring[i].frag.length); } } #endif /* __i386__ debugging only */ @@ -1892,19 +1892,19 @@ static int netdev_close(struct net_device *dev) skb = np->rx_skbuff[i]; if (skb) { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->rx_ring[i].frag[0].addr), + le32_to_cpu(np->rx_ring[i].frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); dev_kfree_skb(skb); np->rx_skbuff[i] = NULL; } - np->rx_ring[i].frag[0].addr = cpu_to_le32(0xBADF00D0); /* poison */ + np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */ } for (i = 0; i < TX_RING_SIZE; i++) { np->tx_ring[i].next_desc = 0; skb = np->tx_skbuff[i]; if (skb) { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[i].frag[0].addr), + le32_to_cpu(np->tx_ring[i].frag.addr), skb->len, DMA_TO_DEVICE); dev_kfree_skb(skb); np->tx_skbuff[i] = NULL; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index e985ae008a97..88534aa29af2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -18,6 +18,7 @@ #include <linux/ptp_classify.h> #include <net/pkt_cls.h> #include <net/sock.h> +#include <net/tso.h> #include "dpaa2-eth.h" @@ -760,6 +761,39 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv, } } +static void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv) +{ + struct dpaa2_eth_sgt_cache *sgt_cache; + void *sgt_buf = NULL; + int sgt_buf_size; + + sgt_cache = this_cpu_ptr(priv->sgt_cache); + sgt_buf_size = priv->tx_data_offset + + DPAA2_ETH_SG_ENTRIES_MAX * sizeof(struct dpaa2_sg_entry); + + if (sgt_cache->count == 0) + sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); + else + sgt_buf = sgt_cache->buf[--sgt_cache->count]; + if (!sgt_buf) + return NULL; + + memset(sgt_buf, 0, sgt_buf_size); + + return sgt_buf; +} + +static void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf) +{ + struct dpaa2_eth_sgt_cache *sgt_cache; + + sgt_cache = this_cpu_ptr(priv->sgt_cache); + if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) + skb_free_frag(sgt_buf); + else + sgt_cache->buf[sgt_cache->count++] = sgt_buf; +} + /* Create a frame descriptor based on a fragmented skb */ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, struct sk_buff *skb, @@ -805,12 +839,11 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, /* Prepare the HW SGT structure */ sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry) * num_dma_bufs; - sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); + sgt_buf = dpaa2_eth_sgt_get(priv); if (unlikely(!sgt_buf)) { err = -ENOMEM; goto sgt_buf_alloc_failed; } - memset(sgt_buf, 0, sgt_buf_size); sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); @@ -846,6 +879,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, err = -ENOMEM; goto dma_map_single_failed; } + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_offset(fd, priv->tx_data_offset); dpaa2_fd_set_format(fd, dpaa2_fd_sg); dpaa2_fd_set_addr(fd, addr); @@ -855,7 +889,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, return 0; dma_map_single_failed: - skb_free_frag(sgt_buf); + dpaa2_eth_sgt_recycle(priv, sgt_buf); sgt_buf_alloc_failed: dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL); dma_map_sg_failed: @@ -875,7 +909,6 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, void **swa_addr) { struct device *dev = priv->net_dev->dev.parent; - struct dpaa2_eth_sgt_cache *sgt_cache; struct dpaa2_sg_entry *sgt; struct dpaa2_eth_swa *swa; dma_addr_t addr, sgt_addr; @@ -884,18 +917,10 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, int err; /* Prepare the HW SGT structure */ - sgt_cache = this_cpu_ptr(priv->sgt_cache); sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry); - - if (sgt_cache->count == 0) - sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN, - GFP_ATOMIC); - else - sgt_buf = sgt_cache->buf[--sgt_cache->count]; + sgt_buf = dpaa2_eth_sgt_get(priv); if (unlikely(!sgt_buf)) return -ENOMEM; - - sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN); sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL); @@ -923,6 +948,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, goto sgt_map_failed; } + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_offset(fd, priv->tx_data_offset); dpaa2_fd_set_format(fd, dpaa2_fd_sg); dpaa2_fd_set_addr(fd, sgt_addr); @@ -934,10 +960,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, sgt_map_failed: dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL); data_map_failed: - if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) - kfree(sgt_buf); - else - sgt_cache->buf[sgt_cache->count++] = sgt_buf; + dpaa2_eth_sgt_recycle(priv, sgt_buf); return err; } @@ -978,6 +1001,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv, if (unlikely(dma_mapping_error(dev, addr))) return -ENOMEM; + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_addr(fd, addr); dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start)); dpaa2_fd_set_len(fd, skb->len); @@ -1005,9 +1029,9 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, struct dpaa2_eth_swa *swa; u8 fd_format = dpaa2_fd_get_format(fd); u32 fd_len = dpaa2_fd_get_len(fd); - - struct dpaa2_eth_sgt_cache *sgt_cache; struct dpaa2_sg_entry *sgt; + int should_free_skb = 1; + int i; fd_addr = dpaa2_fd_get_addr(fd); buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr); @@ -1039,6 +1063,28 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, /* Unmap the SGT buffer */ dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, DMA_BIDIRECTIONAL); + } else if (swa->type == DPAA2_ETH_SWA_SW_TSO) { + skb = swa->tso.skb; + + sgt = (struct dpaa2_sg_entry *)(buffer_start + + priv->tx_data_offset); + + /* Unmap and free the header */ + dma_unmap_single(dev, dpaa2_sg_get_addr(sgt), TSO_HEADER_SIZE, + DMA_TO_DEVICE); + kfree(dpaa2_iova_to_virt(priv->iommu_domain, dpaa2_sg_get_addr(sgt))); + + /* Unmap the other SG entries for the data */ + for (i = 1; i < swa->tso.num_sg; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the SGT buffer */ + dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, + DMA_BIDIRECTIONAL); + + if (!swa->tso.is_last_fd) + should_free_skb = 0; } else { skb = swa->single.skb; @@ -1067,55 +1113,195 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, } /* Get the timestamp value */ - if (skb->cb[0] == TX_TSTAMP) { - struct skb_shared_hwtstamps shhwtstamps; - __le64 *ts = dpaa2_get_ts(buffer_start, true); - u64 ns; - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - - ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); - shhwtstamps.hwtstamp = ns_to_ktime(ns); - skb_tstamp_tx(skb, &shhwtstamps); - } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { - mutex_unlock(&priv->onestep_tstamp_lock); + if (swa->type != DPAA2_ETH_SWA_SW_TSO) { + if (skb->cb[0] == TX_TSTAMP) { + struct skb_shared_hwtstamps shhwtstamps; + __le64 *ts = dpaa2_get_ts(buffer_start, true); + u64 ns; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + + ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(skb, &shhwtstamps); + } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { + mutex_unlock(&priv->onestep_tstamp_lock); + } } /* Free SGT buffer allocated on tx */ - if (fd_format != dpaa2_fd_single) { - sgt_cache = this_cpu_ptr(priv->sgt_cache); - if (swa->type == DPAA2_ETH_SWA_SG) { - skb_free_frag(buffer_start); - } else { - if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) - kfree(buffer_start); - else - sgt_cache->buf[sgt_cache->count++] = buffer_start; + if (fd_format != dpaa2_fd_single) + dpaa2_eth_sgt_recycle(priv, buffer_start); + + /* Move on with skb release. If we are just confirming multiple FDs + * from the same TSO skb then only the last one will need to free the + * skb. + */ + if (should_free_skb) + napi_consume_skb(skb, in_napi); +} + +static int dpaa2_eth_build_gso_fd(struct dpaa2_eth_priv *priv, + struct sk_buff *skb, struct dpaa2_fd *fd, + int *num_fds, u32 *total_fds_len) +{ + struct device *dev = priv->net_dev->dev.parent; + int hdr_len, total_len, data_left, fd_len; + int num_sge, err, i, sgt_buf_size; + struct dpaa2_fd *fd_start = fd; + struct dpaa2_sg_entry *sgt; + struct dpaa2_eth_swa *swa; + dma_addr_t sgt_addr, addr; + dma_addr_t tso_hdr_dma; + unsigned int index = 0; + struct tso_t tso; + char *tso_hdr; + void *sgt_buf; + + /* Initialize the TSO handler, and prepare the first payload */ + hdr_len = tso_start(skb, &tso); + *total_fds_len = 0; + + total_len = skb->len - hdr_len; + while (total_len > 0) { + /* Prepare the HW SGT structure for this frame */ + sgt_buf = dpaa2_eth_sgt_get(priv); + if (unlikely(!sgt_buf)) { + netdev_err(priv->net_dev, "dpaa2_eth_sgt_get() failed\n"); + err = -ENOMEM; + goto err_sgt_get; } + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + + /* Determine the data length of this frame */ + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + fd_len = data_left + hdr_len; + + /* Prepare packet headers: MAC + IP + TCP */ + tso_hdr = kmalloc(TSO_HEADER_SIZE, GFP_ATOMIC); + if (!tso_hdr) { + err = -ENOMEM; + goto err_alloc_tso_hdr; + } + + tso_build_hdr(skb, tso_hdr, &tso, data_left, total_len == 0); + tso_hdr_dma = dma_map_single(dev, tso_hdr, TSO_HEADER_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, tso_hdr_dma)) { + netdev_err(priv->net_dev, "dma_map_single(tso_hdr) failed\n"); + err = -ENOMEM; + goto err_map_tso_hdr; + } + + /* Setup the SG entry for the header */ + dpaa2_sg_set_addr(sgt, tso_hdr_dma); + dpaa2_sg_set_len(sgt, hdr_len); + dpaa2_sg_set_final(sgt, data_left > 0 ? false : true); + + /* Compose the SG entries for each fragment of data */ + num_sge = 1; + while (data_left > 0) { + int size; + + /* Move to the next SG entry */ + sgt++; + size = min_t(int, tso.size, data_left); + + addr = dma_map_single(dev, tso.data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, addr)) { + netdev_err(priv->net_dev, "dma_map_single(tso.data) failed\n"); + err = -ENOMEM; + goto err_map_data; + } + dpaa2_sg_set_addr(sgt, addr); + dpaa2_sg_set_len(sgt, size); + dpaa2_sg_set_final(sgt, size == data_left ? true : false); + + num_sge++; + + /* Build the data for the __next__ fragment */ + data_left -= size; + tso_build_data(skb, &tso, size); + } + + /* Store the skb backpointer in the SGT buffer */ + sgt_buf_size = priv->tx_data_offset + num_sge * sizeof(struct dpaa2_sg_entry); + swa = (struct dpaa2_eth_swa *)sgt_buf; + swa->type = DPAA2_ETH_SWA_SW_TSO; + swa->tso.skb = skb; + swa->tso.num_sg = num_sge; + swa->tso.sgt_size = sgt_buf_size; + swa->tso.is_last_fd = total_len == 0 ? 1 : 0; + + /* Separately map the SGT buffer */ + sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(dev, sgt_addr))) { + netdev_err(priv->net_dev, "dma_map_single(sgt_buf) failed\n"); + err = -ENOMEM; + goto err_map_sgt; + } + + /* Setup the frame descriptor */ + memset(fd, 0, sizeof(struct dpaa2_fd)); + dpaa2_fd_set_offset(fd, priv->tx_data_offset); + dpaa2_fd_set_format(fd, dpaa2_fd_sg); + dpaa2_fd_set_addr(fd, sgt_addr); + dpaa2_fd_set_len(fd, fd_len); + dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA); + + *total_fds_len += fd_len; + /* Advance to the next frame descriptor */ + fd++; + index++; } - /* Move on with skb release */ - napi_consume_skb(skb, in_napi); + *num_fds = index; + + return 0; + +err_map_sgt: +err_map_data: + /* Unmap all the data S/G entries for the current FD */ + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + for (i = 1; i < num_sge; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the header entry */ + dma_unmap_single(dev, tso_hdr_dma, TSO_HEADER_SIZE, DMA_TO_DEVICE); +err_map_tso_hdr: + kfree(tso_hdr); +err_alloc_tso_hdr: + dpaa2_eth_sgt_recycle(priv, sgt_buf); +err_sgt_get: + /* Free all the other FDs that were already fully created */ + for (i = 0; i < index; i++) + dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false); + + return err; } static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); - struct dpaa2_fd fd; - struct rtnl_link_stats64 *percpu_stats; + int total_enqueued = 0, retries = 0, enqueued; struct dpaa2_eth_drv_stats *percpu_extras; + struct rtnl_link_stats64 *percpu_stats; + unsigned int needed_headroom; + int num_fds = 1, max_retries; struct dpaa2_eth_fq *fq; struct netdev_queue *nq; + struct dpaa2_fd *fd; u16 queue_mapping; - unsigned int needed_headroom; - u32 fd_len; + void *swa = NULL; u8 prio = 0; int err, i; - void *swa; + u32 fd_len; percpu_stats = this_cpu_ptr(priv->percpu_stats); percpu_extras = this_cpu_ptr(priv->percpu_extras); + fd = (this_cpu_ptr(priv->fd))->array; needed_headroom = dpaa2_eth_needed_headroom(skb); @@ -1130,20 +1316,28 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, } /* Setup the FD fields */ - memset(&fd, 0, sizeof(fd)); - if (skb_is_nonlinear(skb)) { - err = dpaa2_eth_build_sg_fd(priv, skb, &fd, &swa); + if (skb_is_gso(skb)) { + err = dpaa2_eth_build_gso_fd(priv, skb, fd, &num_fds, &fd_len); + percpu_extras->tx_sg_frames += num_fds; + percpu_extras->tx_sg_bytes += fd_len; + percpu_extras->tx_tso_frames += num_fds; + percpu_extras->tx_tso_bytes += fd_len; + } else if (skb_is_nonlinear(skb)) { + err = dpaa2_eth_build_sg_fd(priv, skb, fd, &swa); percpu_extras->tx_sg_frames++; percpu_extras->tx_sg_bytes += skb->len; + fd_len = dpaa2_fd_get_len(fd); } else if (skb_headroom(skb) < needed_headroom) { - err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd, &swa); + err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, fd, &swa); percpu_extras->tx_sg_frames++; percpu_extras->tx_sg_bytes += skb->len; percpu_extras->tx_converted_sg_frames++; percpu_extras->tx_converted_sg_bytes += skb->len; + fd_len = dpaa2_fd_get_len(fd); } else { - err = dpaa2_eth_build_single_fd(priv, skb, &fd, &swa); + err = dpaa2_eth_build_single_fd(priv, skb, fd, &swa); + fd_len = dpaa2_fd_get_len(fd); } if (unlikely(err)) { @@ -1151,11 +1345,12 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, goto err_build_fd; } - if (skb->cb[0]) - dpaa2_eth_enable_tx_tstamp(priv, &fd, swa, skb); + if (swa && skb->cb[0]) + dpaa2_eth_enable_tx_tstamp(priv, fd, swa, skb); /* Tracing point */ - trace_dpaa2_tx_fd(net_dev, &fd); + for (i = 0; i < num_fds; i++) + trace_dpaa2_tx_fd(net_dev, &fd[i]); /* TxConf FQ selection relies on queue id from the stack. * In case of a forwarded frame from another DPNI interface, we choose @@ -1175,27 +1370,32 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, queue_mapping %= dpaa2_eth_queue_count(priv); } fq = &priv->fq[queue_mapping]; - - fd_len = dpaa2_fd_get_len(&fd); nq = netdev_get_tx_queue(net_dev, queue_mapping); netdev_tx_sent_queue(nq, fd_len); /* Everything that happens after this enqueues might race with * the Tx confirmation callback for this frame */ - for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { - err = priv->enqueue(priv, fq, &fd, prio, 1, NULL); - if (err != -EBUSY) - break; + max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES; + while (total_enqueued < num_fds && retries < max_retries) { + err = priv->enqueue(priv, fq, &fd[total_enqueued], + prio, num_fds - total_enqueued, &enqueued); + if (err == -EBUSY) { + retries++; + continue; + } + + total_enqueued += enqueued; } - percpu_extras->tx_portal_busy += i; + percpu_extras->tx_portal_busy += retries; + if (unlikely(err < 0)) { percpu_stats->tx_errors++; /* Clean up everything, including freeing the skb */ - dpaa2_eth_free_tx_fd(priv, fq, &fd, false); + dpaa2_eth_free_tx_fd(priv, fq, fd, false); netdev_tx_completed_queue(nq, 1, fd_len); } else { - percpu_stats->tx_packets++; + percpu_stats->tx_packets += total_enqueued; percpu_stats->tx_bytes += fd_len; } @@ -1523,7 +1723,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv) count = sgt_cache->count; for (i = 0; i < count; i++) - kfree(sgt_cache->buf[i]); + skb_free_frag(sgt_cache->buf[i]); sgt_cache->count = 0; } } @@ -4115,7 +4315,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) net_dev->features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_LLTX | NETIF_F_HW_TC; + NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO; + net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS; net_dev->hw_features = net_dev->features; if (priv->dpni_attrs.vlan_filter_entries) @@ -4397,6 +4598,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) goto err_alloc_sgt_cache; } + priv->fd = alloc_percpu(*priv->fd); + if (!priv->fd) { + dev_err(dev, "alloc_percpu(fds) failed\n"); + err = -ENOMEM; + goto err_alloc_fds; + } + err = dpaa2_eth_netdev_init(net_dev); if (err) goto err_netdev_init; @@ -4484,6 +4692,8 @@ err_poll_thread: err_alloc_rings: err_csum: err_netdev_init: + free_percpu(priv->fd); +err_alloc_fds: free_percpu(priv->sgt_cache); err_alloc_sgt_cache: free_percpu(priv->percpu_extras); @@ -4539,6 +4749,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_free_irqs(ls_dev); dpaa2_eth_free_rings(priv); + free_percpu(priv->fd); free_percpu(priv->sgt_cache); free_percpu(priv->percpu_stats); free_percpu(priv->percpu_extras); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index e54e70ebdd05..b79831cd1a94 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -122,6 +122,7 @@ enum dpaa2_eth_swa_type { DPAA2_ETH_SWA_SINGLE, DPAA2_ETH_SWA_SG, DPAA2_ETH_SWA_XDP, + DPAA2_ETH_SWA_SW_TSO, }; /* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */ @@ -142,6 +143,12 @@ struct dpaa2_eth_swa { int dma_size; struct xdp_frame *xdpf; } xdp; + struct { + struct sk_buff *skb; + int num_sg; + int sgt_size; + int is_last_fd; + } tso; }; }; @@ -354,6 +361,8 @@ struct dpaa2_eth_drv_stats { __u64 tx_conf_bytes; __u64 tx_sg_frames; __u64 tx_sg_bytes; + __u64 tx_tso_frames; + __u64 tx_tso_bytes; __u64 rx_sg_frames; __u64 rx_sg_bytes; /* Linear skbs sent as a S/G FD due to insufficient headroom */ @@ -493,8 +502,15 @@ struct dpaa2_eth_trap_data { struct dpaa2_eth_priv *priv; }; +#define DPAA2_ETH_SG_ENTRIES_MAX (PAGE_SIZE / sizeof(struct scatterlist)) + #define DPAA2_ETH_DEFAULT_COPYBREAK 512 +#define DPAA2_ETH_ENQUEUE_MAX_FDS 200 +struct dpaa2_eth_fds { + struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS]; +}; + /* Driver private data */ struct dpaa2_eth_priv { struct net_device *net_dev; @@ -577,6 +593,8 @@ struct dpaa2_eth_priv { struct devlink_port devlink_port; u32 rx_copybreak; + + struct dpaa2_eth_fds __percpu *fd; }; struct dpaa2_eth_devlink_priv { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 3fdbf87dccb1..eea7d7a07c00 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -44,6 +44,8 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = { "[drv] tx conf bytes", "[drv] tx sg frames", "[drv] tx sg bytes", + "[drv] tx tso frames", + "[drv] tx tso bytes", "[drv] rx sg frames", "[drv] rx sg bytes", "[drv] tx converted sg frames", diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index fb39e406b7fc..68d806dc3701 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -18,6 +18,8 @@ #define ENETC_MAX_MTU (ENETC_MAC_MAXFRM_SIZE - \ (ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN)) +#define ENETC_CBD_DATA_MEM_ALIGN 64 + struct enetc_tx_swbd { union { struct sk_buff *skb; @@ -415,6 +417,42 @@ int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count); int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count); int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd); +static inline void *enetc_cbd_alloc_data_mem(struct enetc_si *si, + struct enetc_cbd *cbd, + int size, dma_addr_t *dma, + void **data_align) +{ + struct enetc_cbdr *ring = &si->cbd_ring; + dma_addr_t dma_align; + void *data; + + data = dma_alloc_coherent(ring->dma_dev, + size + ENETC_CBD_DATA_MEM_ALIGN, + dma, GFP_KERNEL); + if (!data) { + dev_err(ring->dma_dev, "CBD alloc data memory failed!\n"); + return NULL; + } + + dma_align = ALIGN(*dma, ENETC_CBD_DATA_MEM_ALIGN); + *data_align = PTR_ALIGN(data, ENETC_CBD_DATA_MEM_ALIGN); + + cbd->addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd->addr[1] = cpu_to_le32(upper_32_bits(dma_align)); + cbd->length = cpu_to_le16(size); + + return data; +} + +static inline void enetc_cbd_free_data_mem(struct enetc_si *si, int size, + void *data, dma_addr_t *dma) +{ + struct enetc_cbdr *ring = &si->cbd_ring; + + dma_free_coherent(ring->dma_dev, size + ENETC_CBD_DATA_MEM_ALIGN, + data, *dma); +} + #ifdef CONFIG_FSL_ENETC_QOS int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data); void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c index 073e56dcca4e..af68dc46a795 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c @@ -166,70 +166,55 @@ int enetc_set_mac_flt_entry(struct enetc_si *si, int index, return enetc_send_cmd(si, &cbd); } -#define RFSE_ALIGN 64 /* Set entry in RFS table */ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, int index) { struct enetc_cbdr *ring = &si->cbd_ring; struct enetc_cbd cbd = {.cmd = 0}; - dma_addr_t dma, dma_align; void *tmp, *tmp_align; + dma_addr_t dma; int err; /* fill up the "set" descriptor */ cbd.cmd = 0; cbd.cls = 4; cbd.index = cpu_to_le16(index); - cbd.length = cpu_to_le16(sizeof(*rfse)); cbd.opt[3] = cpu_to_le32(0); /* SI */ - tmp = dma_alloc_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN, - &dma, GFP_KERNEL); - if (!tmp) { - dev_err(ring->dma_dev, "DMA mapping of RFS entry failed!\n"); + tmp = enetc_cbd_alloc_data_mem(si, &cbd, sizeof(*rfse), + &dma, &tmp_align); + if (!tmp) return -ENOMEM; - } - dma_align = ALIGN(dma, RFSE_ALIGN); - tmp_align = PTR_ALIGN(tmp, RFSE_ALIGN); memcpy(tmp_align, rfse, sizeof(*rfse)); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); - err = enetc_send_cmd(si, &cbd); if (err) dev_err(ring->dma_dev, "FS entry add failed (%d)!", err); - dma_free_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN, - tmp, dma); + enetc_cbd_free_data_mem(si, sizeof(*rfse), tmp, &dma); return err; } -#define RSSE_ALIGN 64 static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, bool read) { struct enetc_cbdr *ring = &si->cbd_ring; struct enetc_cbd cbd = {.cmd = 0}; - dma_addr_t dma, dma_align; u8 *tmp, *tmp_align; + dma_addr_t dma; int err, i; - if (count < RSSE_ALIGN) + if (count < ENETC_CBD_DATA_MEM_ALIGN) /* HW only takes in a full 64 entry table */ return -EINVAL; - tmp = dma_alloc_coherent(ring->dma_dev, count + RSSE_ALIGN, - &dma, GFP_KERNEL); - if (!tmp) { - dev_err(ring->dma_dev, "DMA mapping of RSS table failed!\n"); + tmp = enetc_cbd_alloc_data_mem(si, &cbd, count, + &dma, (void *)&tmp_align); + if (!tmp) return -ENOMEM; - } - dma_align = ALIGN(dma, RSSE_ALIGN); - tmp_align = PTR_ALIGN(tmp, RSSE_ALIGN); if (!read) for (i = 0; i < count; i++) @@ -238,10 +223,6 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, /* fill up the descriptor */ cbd.cmd = read ? 2 : 1; cbd.cls = 3; - cbd.length = cpu_to_le16(count); - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); err = enetc_send_cmd(si, &cbd); if (err) @@ -251,7 +232,7 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, for (i = 0; i < count; i++) table[i] = tmp_align[i]; - dma_free_coherent(ring->dma_dev, count + RSSE_ALIGN, tmp, dma); + enetc_cbd_free_data_mem(si, count, tmp, &dma); return err; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 3555c12edb45..5a3eea1a718b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -52,10 +52,11 @@ static int enetc_setup_taprio(struct net_device *ndev, struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; struct tgs_gcl_data *gcl_data; - struct gce *gce; dma_addr_t dma; + struct gce *gce; u16 data_size; u16 gcl_len; + void *tmp; u32 tge; int err; int i; @@ -82,8 +83,9 @@ static int enetc_setup_taprio(struct net_device *ndev, gcl_config = &cbd.gcl_conf; data_size = struct_size(gcl_data, entry, gcl_len); - gcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!gcl_data) + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&gcl_data); + if (!tmp) return -ENOMEM; gce = (struct gce *)(gcl_data + 1); @@ -107,19 +109,8 @@ static int enetc_setup_taprio(struct net_device *ndev, temp_gce->period = cpu_to_le32(temp_entry->interval); } - cbd.length = cpu_to_le16(data_size); cbd.status_flags = 0; - dma = dma_map_single(&priv->si->pdev->dev, gcl_data, - data_size, DMA_TO_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(gcl_data); - return -ENOMEM; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; @@ -132,8 +123,7 @@ static int enetc_setup_taprio(struct net_device *ndev, ENETC_QBV_PTGCR_OFFSET, tge & (~ENETC_QBV_TGE)); - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE); - kfree(gcl_data); + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); return err; } @@ -450,6 +440,7 @@ static struct actions_fwd enetc_act_fwd[] = { }; static struct enetc_psfp epsfp = { + .dev_bitmap = 0, .psfp_sfi_bitmap = NULL, }; @@ -463,8 +454,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, struct enetc_cbd cbd = {.cmd = 0}; struct streamid_data *si_data; struct streamid_conf *si_conf; - u16 data_size; dma_addr_t dma; + u16 data_size; + void *tmp; int port; int err; @@ -485,21 +477,11 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct streamid_data); - si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!si_data) + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&si_data); + if (!tmp) return -ENOMEM; - cbd.length = cpu_to_le16(data_size); - dma = dma_map_single(&priv->si->pdev->dev, si_data, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto out; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); eth_broadcast_addr(si_data->dmac); si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK + ((0x3 << 14) | ENETC_CBDR_SID_VIDM)); @@ -520,11 +502,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, goto out; /* Enable the entry overwrite again incase space flushed by hardware */ - memset(&cbd, 0, sizeof(cbd)); - - cbd.index = cpu_to_le16((u16)sid->index); - cbd.cmd = 0; - cbd.cls = BDCR_CMD_STREAM_IDENTIFY; cbd.status_flags = 0; si_conf->en = 0x80; @@ -537,11 +514,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, memset(si_data, 0, data_size); - cbd.length = cpu_to_le16(data_size); - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); - /* VIDM default to be 1. * VID Match. If set (b1) then the VID must match, otherwise * any VID is considered a match. VIDM setting is only used @@ -561,10 +533,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); out: - if (!dma_mapping_error(&priv->si->pdev->dev, dma)) - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE); - - kfree(si_data); + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); return err; } @@ -635,6 +604,7 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, struct sfi_counter_data *data_buf; dma_addr_t dma; u16 data_size; + void *tmp; int err; cbd.index = cpu_to_le16((u16)index); @@ -643,21 +613,11 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct sfi_counter_data); - data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!data_buf) - return -ENOMEM; - - dma = dma_map_single(&priv->si->pdev->dev, data_buf, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto exit; - } - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); - cbd.length = cpu_to_le16(data_size); + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&data_buf); + if (!tmp) + return -ENOMEM; err = enetc_send_cmd(priv->si, &cbd); if (err) @@ -684,7 +644,8 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, data_buf->flow_meter_dropl; exit: - kfree(data_buf); + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); + return err; } @@ -726,6 +687,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, dma_addr_t dma; u16 data_size; int err, i; + void *tmp; u64 now; cbd.index = cpu_to_le16(sgi->index); @@ -772,24 +734,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3; data_size = struct_size(sgcl_data, sgcl, sgi->num_entries); - - sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!sgcl_data) - return -ENOMEM; - - cbd.length = cpu_to_le16(data_size); - - dma = dma_map_single(&priv->si->pdev->dev, - sgcl_data, data_size, - DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(sgcl_data); + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&sgcl_data); + if (!tmp) return -ENOMEM; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); sgce = &sgcl_data->sgcl[0]; @@ -844,8 +792,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); exit: - kfree(sgcl_data); - + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); return err; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 9298fbecb31a..6f18c9a03231 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -167,6 +167,7 @@ struct hnae3_handle; struct hnae3_queue { void __iomem *io_base; + void __iomem *mem_base; struct hnae3_ae_algo *ae_algo; struct hnae3_handle *handle; int tqp_index; /* index in a handle */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index babc5d7a3b52..0b8a73c40b12 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2028,9 +2028,73 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, return bd_num; } +static void hns3_tx_push_bd(struct hns3_enet_ring *ring, int num) +{ +#define HNS3_BYTES_PER_64BIT 8 + + struct hns3_desc desc[HNS3_MAX_PUSH_BD_NUM] = {}; + int offset = 0; + + /* make sure everything is visible to device before + * excuting tx push or updating doorbell + */ + dma_wmb(); + + do { + int idx = (ring->next_to_use - num + ring->desc_num) % + ring->desc_num; + + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_push++; + u64_stats_update_end(&ring->syncp); + memcpy(&desc[offset], &ring->desc[idx], + sizeof(struct hns3_desc)); + offset++; + } while (--num); + + __iowrite64_copy(ring->tqp->mem_base, desc, + (sizeof(struct hns3_desc) * HNS3_MAX_PUSH_BD_NUM) / + HNS3_BYTES_PER_64BIT); + + io_stop_wc(); +} + +static void hns3_tx_mem_doorbell(struct hns3_enet_ring *ring) +{ +#define HNS3_MEM_DOORBELL_OFFSET 64 + + __le64 bd_num = cpu_to_le64((u64)ring->pending_buf); + + /* make sure everything is visible to device before + * excuting tx push or updating doorbell + */ + dma_wmb(); + + __iowrite64_copy(ring->tqp->mem_base + HNS3_MEM_DOORBELL_OFFSET, + &bd_num, 1); + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_mem_doorbell += ring->pending_buf; + u64_stats_update_end(&ring->syncp); + + io_stop_wc(); +} + static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, bool doorbell) { + struct net_device *netdev = ring_to_netdev(ring); + struct hns3_nic_priv *priv = netdev_priv(netdev); + + /* when tx push is enabled, the packet whose number of BD below + * HNS3_MAX_PUSH_BD_NUM can be pushed directly. + */ + if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num && + !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) { + hns3_tx_push_bd(ring, num); + WRITE_ONCE(ring->last_to_use, ring->next_to_use); + return; + } + ring->pending_buf += num; if (!doorbell) { @@ -2038,11 +2102,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, return; } - if (!ring->pending_buf) - return; + if (ring->tqp->mem_base) + hns3_tx_mem_doorbell(ring); + else + writel(ring->pending_buf, + ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG); - writel(ring->pending_buf, - ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG); ring->pending_buf = 0; WRITE_ONCE(ring->last_to_use, ring->next_to_use); } @@ -2732,6 +2797,9 @@ static void hns3_dump_queue_stats(struct net_device *ndev, "seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n", tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_more, tx_ring->stats.restart_queue, tx_ring->stats.tx_busy); + + netdev_info(ndev, "tx_push: %llu, tx_mem_doorbell: %llu\n", + tx_ring->stats.tx_push, tx_ring->stats.tx_mem_doorbell); } static void hns3_dump_queue_reg(struct net_device *ndev, @@ -5094,6 +5162,9 @@ static void hns3_state_init(struct hnae3_handle *handle) set_bit(HNS3_NIC_STATE_INITED, &priv->state); + if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps)) + set_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state); + if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index a05a0c7423ce..4a3253692dcc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -7,6 +7,7 @@ #include <linux/dim.h> #include <linux/if_vlan.h> #include <net/page_pool.h> +#include <asm/barrier.h> #include "hnae3.h" @@ -25,9 +26,12 @@ enum hns3_nic_state { HNS3_NIC_STATE2_RESET_REQUESTED, HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, + HNS3_NIC_STATE_TX_PUSH_ENABLE, HNS3_NIC_STATE_MAX }; +#define HNS3_MAX_PUSH_BD_NUM 2 + #define HNS3_RING_RX_RING_BASEADDR_L_REG 0x00000 #define HNS3_RING_RX_RING_BASEADDR_H_REG 0x00004 #define HNS3_RING_RX_RING_BD_NUM_REG 0x00008 @@ -410,6 +414,8 @@ struct ring_stats { u64 tx_pkts; u64 tx_bytes; u64 tx_more; + u64 tx_push; + u64 tx_mem_doorbell; u64 restart_queue; u64 tx_busy; u64 tx_copy; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index c06c39ece80d..6469238ae090 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -23,6 +23,8 @@ static const struct hns3_stats hns3_txq_stats[] = { HNS3_TQP_STAT("packets", tx_pkts), HNS3_TQP_STAT("bytes", tx_bytes), HNS3_TQP_STAT("more", tx_more), + HNS3_TQP_STAT("push", tx_push), + HNS3_TQP_STAT("mem_doorbell", tx_mem_doorbell), HNS3_TQP_STAT("wake", restart_queue), HNS3_TQP_STAT("busy", tx_busy), HNS3_TQP_STAT("copy", tx_copy), diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 24f7afacae02..78d0498bdabc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1643,6 +1643,7 @@ static int hclge_config_gro(struct hclge_dev *hdev) static int hclge_alloc_tqps(struct hclge_dev *hdev) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); struct hclge_comm_tqp *tqp; int i; @@ -1676,6 +1677,14 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev) (i - HCLGE_TQP_MAX_SIZE_DEV_V2) * HCLGE_TQP_REG_SIZE; + /* when device supports tx push and has device memory, + * the queue can execute push mode or doorbell mode on + * device memory. + */ + if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps)) + tqp->q.mem_base = hdev->hw.hw.mem_base + + HCLGE_TQP_MEM_OFFSET(hdev, i); + tqp++; } @@ -11008,8 +11017,6 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, static int hclge_dev_mem_map(struct hclge_dev *hdev) { -#define HCLGE_MEM_BAR 4 - struct pci_dev *pdev = hdev->pdev; struct hclge_hw *hw = &hdev->hw; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index adfb26e79262..f7f5a4b09068 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -169,6 +169,14 @@ enum HLCGE_PORT_TYPE { #define HCLGE_VECTOR0_ALL_MSIX_ERR_B 6U #define HCLGE_TRIGGER_IMP_RESET_B 7U +#define HCLGE_TQP_MEM_SIZE 0x10000 +#define HCLGE_MEM_BAR 4 +/* in the bar4, the first half is for roce, and the second half is for nic */ +#define HCLGE_NIC_MEM_OFFSET(hdev) \ + (pci_resource_len((hdev)->pdev, HCLGE_MEM_BAR) >> 1) +#define HCLGE_TQP_MEM_OFFSET(hdev, i) \ + (HCLGE_NIC_MEM_OFFSET(hdev) + HCLGE_TQP_MEM_SIZE * (i)) + #define HCLGE_MAC_DEFAULT_FRAME \ (ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN) #define HCLGE_MAC_MIN_FRAME 64 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 21442a9bb996..93389bec8d89 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -321,6 +321,7 @@ static int hclgevf_get_pf_media_type(struct hclgevf_dev *hdev) static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); struct hclge_comm_tqp *tqp; int i; @@ -354,6 +355,14 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev) (i - HCLGEVF_TQP_MAX_SIZE_DEV_V2) * HCLGEVF_TQP_REG_SIZE; + /* when device supports tx push and has device memory, + * the queue can execute push mode or doorbell mode on + * device memory. + */ + if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps)) + tqp->q.mem_base = hdev->hw.hw.mem_base + + HCLGEVF_TQP_MEM_OFFSET(hdev, i); + tqp++; } @@ -2546,8 +2555,6 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, static int hclgevf_dev_mem_map(struct hclgevf_dev *hdev) { -#define HCLGEVF_MEM_BAR 4 - struct pci_dev *pdev = hdev->pdev; struct hclgevf_hw *hw = &hdev->hw; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 502ca1ce1a90..4b00fd44f118 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -96,6 +96,14 @@ #define HCLGEVF_RSS_IND_TBL_SIZE 512 +#define HCLGEVF_TQP_MEM_SIZE 0x10000 +#define HCLGEVF_MEM_BAR 4 +/* in the bar4, the first half is for roce, and the second half is for nic */ +#define HCLGEVF_NIC_MEM_OFFSET(hdev) \ + (pci_resource_len((hdev)->pdev, HCLGEVF_MEM_BAR) >> 1) +#define HCLGEVF_TQP_MEM_OFFSET(hdev, i) \ + (HCLGEVF_NIC_MEM_OFFSET(hdev) + HCLGEVF_TQP_MEM_SIZE * (i)) + #define HCLGEVF_MAC_MAX_FRAME 9728 #define HCLGEVF_STATS_TIMER_INTERVAL 36U diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 80c5cecaf2b5..55c6bce5da61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -854,6 +854,10 @@ struct i40e_vsi { u64 tx_force_wb; u64 rx_buf_failed; u64 rx_page_failed; + u64 rx_page_reuse; + u64 rx_page_alloc; + u64 rx_page_waive; + u64 rx_page_busy; /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 7abef88801fb..42439f725aa4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -769,7 +769,7 @@ static bool i40e_asq_done(struct i40e_hw *hw) } /** - * i40e_asq_send_command_atomic - send command to Admin Queue + * i40e_asq_send_command_atomic_exec - send command to Admin Queue * @hw: pointer to the hw struct * @desc: prefilled descriptor describing the command (non DMA mem) * @buff: buffer to use for indirect commands @@ -780,11 +780,13 @@ static bool i40e_asq_done(struct i40e_hw *hw) * This is the main send command driver routine for the Admin Queue send * queue. It runs the queue, cleans the queue, etc **/ -i40e_status -i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, - void *buff, /* can be NULL */ u16 buff_size, - struct i40e_asq_cmd_details *cmd_details, - bool is_atomic_context) +static i40e_status +i40e_asq_send_command_atomic_exec(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context) { i40e_status status = 0; struct i40e_dma_mem *dma_buff = NULL; @@ -794,8 +796,6 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, u16 retval = 0; u32 val = 0; - mutex_lock(&hw->aq.asq_mutex); - if (hw->aq.asq.count == 0) { i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: Admin queue not initialized.\n"); @@ -969,6 +969,36 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, } asq_send_command_error: + return status; +} + +/** + * i40e_asq_send_command_atomic - send command to Admin Queue + * @hw: pointer to the hw struct + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buff: buffer to use for indirect commands + * @buff_size: size of buffer for indirect commands + * @cmd_details: pointer to command details structure + * @is_atomic_context: is the function called in an atomic context? + * + * Acquires the lock and calls the main send command execution + * routine. + **/ +i40e_status +i40e_asq_send_command_atomic(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context) +{ + i40e_status status; + + mutex_lock(&hw->aq.asq_mutex); + status = i40e_asq_send_command_atomic_exec(hw, desc, buff, buff_size, + cmd_details, + is_atomic_context); + mutex_unlock(&hw->aq.asq_mutex); return status; } @@ -983,6 +1013,52 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc, } /** + * i40e_asq_send_command_atomic_v2 - send command to Admin Queue + * @hw: pointer to the hw struct + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buff: buffer to use for indirect commands + * @buff_size: size of buffer for indirect commands + * @cmd_details: pointer to command details structure + * @is_atomic_context: is the function called in an atomic context? + * @aq_status: pointer to Admin Queue status return value + * + * Acquires the lock and calls the main send command execution + * routine. Returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + **/ +i40e_status +i40e_asq_send_command_atomic_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context, + enum i40e_admin_queue_err *aq_status) +{ + i40e_status status; + + mutex_lock(&hw->aq.asq_mutex); + status = i40e_asq_send_command_atomic_exec(hw, desc, buff, + buff_size, + cmd_details, + is_atomic_context); + if (aq_status) + *aq_status = hw->aq.asq_last_status; + mutex_unlock(&hw->aq.asq_mutex); + return status; +} + +i40e_status +i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size, + cmd_details, true, aq_status); +} + +/** * i40e_fill_default_direct_cmd_desc - AQ descriptor helper function * @desc: pointer to the temp descriptor (non DMA mem) * @opcode: the opcode can be used to decide which flags to turn off or on diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 9ddeb015eb7e..6aefffd83615 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1899,8 +1899,9 @@ i40e_status i40e_aq_add_vsi(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); if (status) goto aq_add_vsi_exit; @@ -2287,8 +2288,9 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used); vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); @@ -2632,33 +2634,28 @@ get_veb_exit: } /** - * i40e_aq_add_macvlan - * @hw: pointer to the hw struct - * @seid: VSI for the mac address + * i40e_prepare_add_macvlan * @mv_list: list of macvlans to be added + * @desc: pointer to AQ descriptor structure * @count: length of the list - * @cmd_details: pointer to command details structure or NULL + * @seid: VSI for the mac address * - * Add MAC/VLAN addresses to the HW filtering + * Internal helper function that prepares the add macvlan request + * and returns the buffer size. **/ -i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, - struct i40e_aqc_add_macvlan_element_data *mv_list, - u16 count, struct i40e_asq_cmd_details *cmd_details) +static u16 +i40e_prepare_add_macvlan(struct i40e_aqc_add_macvlan_element_data *mv_list, + struct i40e_aq_desc *desc, u16 count, u16 seid) { - struct i40e_aq_desc desc; struct i40e_aqc_macvlan *cmd = - (struct i40e_aqc_macvlan *)&desc.params.raw; - i40e_status status; + (struct i40e_aqc_macvlan *)&desc->params.raw; u16 buf_size; int i; - if (count == 0 || !mv_list || !hw) - return I40E_ERR_PARAM; - buf_size = count * sizeof(*mv_list); /* prep the rest of the request */ - i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_macvlan); + i40e_fill_default_direct_cmd_desc(desc, i40e_aqc_opc_add_macvlan); cmd->num_addresses = cpu_to_le16(count); cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid); cmd->seid[1] = 0; @@ -2669,14 +2666,71 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, mv_list[i].flags |= cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC); - desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + desc->flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); if (buf_size > I40E_AQ_LARGE_BUF) - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + return buf_size; +} - return status; +/** + * i40e_aq_add_macvlan + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be added + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * + * Add MAC/VLAN addresses to the HW filtering + **/ +i40e_status +i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid); + + return i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); +} + +/** + * i40e_aq_add_macvlan_v2 + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be added + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * @aq_status: pointer to Admin Queue status return value + * + * Add MAC/VLAN addresses to the HW filtering. + * The _v2 version returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + * It also calls _v2 versions of asq_send_command functions to + * get the aq_status on the stack. + **/ +i40e_status +i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid); + + return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size, + cmd_details, true, aq_status); } /** @@ -2715,13 +2769,59 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); return status; } /** + * i40e_aq_remove_macvlan_v2 + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be removed + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * @aq_status: pointer to Admin Queue status return value + * + * Remove MAC/VLAN addresses from the HW filtering. + * The _v2 version returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + * It also calls _v2 versions of asq_send_command functions to + * get the aq_status on the stack. + **/ +i40e_status +i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_remove_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + struct i40e_aqc_macvlan *cmd; + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = count * sizeof(*mv_list); + + /* prep the rest of the request */ + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan); + cmd = (struct i40e_aqc_macvlan *)&desc.params.raw; + cmd->num_addresses = cpu_to_le16(count); + cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid); + cmd->seid[1] = 0; + cmd->seid[2] = 0; + + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + if (buf_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + + return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size, + cmd_details, true, aq_status); +} + +/** * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule * @hw: pointer to the hw struct * @opcode: AQ opcode for add or delete mirror rule @@ -3868,7 +3968,8 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid, cmd->seid = cpu_to_le16(seid); - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0, + cmd_details, true); return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 1e57cc8c47d7..90fff05fbd2b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -275,9 +275,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.alloc_page_failed, rx_ring->rx_stats.alloc_buff_failed); dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n", + " rx_rings[%i]: rx_stats: realloc_count = 0, page_reuse_count = %lld\n", i, - rx_ring->rx_stats.realloc_count, rx_ring->rx_stats.page_reuse_count); dev_info(&pf->pdev->dev, " rx_rings[%i]: size = %i\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 091f36adbbe1..e48499624d22 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -295,6 +295,10 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("tx_busy", tx_busy), I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), + I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse), + I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc), + I40E_VSI_STAT("rx_cache_waive", rx_page_waive), + I40E_VSI_STAT("rx_cache_busy", rx_page_busy), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f531bc1df338..9b7ce6d9a92b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -773,6 +773,7 @@ void i40e_update_veb_stats(struct i40e_veb *veb) **/ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) { + u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy; struct i40e_pf *pf = vsi->back; struct rtnl_link_stats64 *ons; struct rtnl_link_stats64 *ns; /* netdev stats */ @@ -780,7 +781,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct i40e_eth_stats *es; /* device's eth stats */ u64 tx_restart, tx_busy; struct i40e_ring *p; - u64 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; @@ -806,6 +806,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; rx_page = 0; rx_buf = 0; + rx_reuse = 0; + rx_alloc = 0; + rx_waive = 0; + rx_busy = 0; rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ @@ -839,6 +843,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; rx_page += p->rx_stats.alloc_page_failed; + rx_reuse += p->rx_stats.page_reuse_count; + rx_alloc += p->rx_stats.page_alloc_count; + rx_waive += p->rx_stats.page_waive_count; + rx_busy += p->rx_stats.page_busy_count; if (i40e_enabled_xdp_vsi(vsi)) { /* locate XDP ring */ @@ -866,6 +874,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_force_wb = tx_force_wb; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; + vsi->rx_page_reuse = rx_reuse; + vsi->rx_page_alloc = rx_alloc; + vsi->rx_page_waive = rx_waive; + vsi->rx_page_busy = rx_busy; ns->rx_packets = rx_p; ns->rx_bytes = rx_b; @@ -2143,19 +2155,19 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, int num_del, int *retval) { struct i40e_hw *hw = &vsi->back->hw; + enum i40e_admin_queue_err aq_status; i40e_status aq_ret; - int aq_err; - aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL); - aq_err = hw->aq.asq_last_status; + aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL, + &aq_status); /* Explicitly ignore and do not report when firmware returns ENOENT */ - if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) { + if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) { *retval = -EIO; dev_info(&vsi->back->pdev->dev, "ignoring delete macvlan error on %s, err %s, aq_err %s\n", vsi_name, i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, aq_err)); + i40e_aq_str(hw, aq_status)); } } @@ -2178,10 +2190,10 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, int num_add) { struct i40e_hw *hw = &vsi->back->hw; - int aq_err, fcnt; + enum i40e_admin_queue_err aq_status; + int fcnt; - i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL); - aq_err = hw->aq.asq_last_status; + i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status); fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { @@ -2189,17 +2201,19 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", - i40e_aq_str(hw, aq_err), vsi_name); + i40e_aq_str(hw, aq_status), vsi_name); } else if (vsi->type == I40E_VSI_SRIOV || vsi->type == I40E_VSI_VMDQ1 || vsi->type == I40E_VSI_VMDQ2) { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n", - i40e_aq_str(hw, aq_err), vsi_name, vsi_name); + i40e_aq_str(hw, aq_status), vsi_name, + vsi_name); } else { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, incorrect VSI type: %i.\n", - i40e_aq_str(hw, aq_err), vsi_name, vsi->type); + i40e_aq_str(hw, aq_status), vsi_name, + vsi->type); } } } @@ -12722,7 +12736,8 @@ static int i40e_set_features(struct net_device *netdev, else i40e_vlan_stripping_disable(vsi); - if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) { + if (!(features & NETIF_F_HW_TC) && + (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) { dev_err(&pf->pdev->dev, "Offloaded tc filters active, can't turn hw_tc_offload off"); return -EINVAL; @@ -13478,6 +13493,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + netdev->features &= ~NETIF_F_HW_TC; + if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); ether_addr_copy(mac_addr, hw->mac.perm_addr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 9241b6005ad3..ebdcde6f1aeb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -27,10 +27,25 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details); i40e_status +i40e_asq_send_command_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); +i40e_status i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details, bool is_atomic_context); +i40e_status +i40e_asq_send_command_atomic_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context, + enum i40e_admin_queue_err *aq_status); /* debug function for adminq */ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, @@ -150,9 +165,19 @@ i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw, i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_add_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_remove_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_remove_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, struct i40e_asq_cmd_details *cmd_details, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 66cc79500c10..0eae5858f2fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -830,8 +830,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) i40e_clean_tx_ring(tx_ring); kfree(tx_ring->tx_bi); tx_ring->tx_bi = NULL; - kfree(tx_ring->xsk_descs); - tx_ring->xsk_descs = NULL; if (tx_ring->desc) { dma_free_coherent(tx_ring->dev, tx_ring->size, @@ -1382,8 +1380,6 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, new_buff->page_offset = old_buff->page_offset; new_buff->pagecnt_bias = old_buff->pagecnt_bias; - rx_ring->rx_stats.page_reuse_count++; - /* clear contents of buffer_info */ old_buff->page = NULL; } @@ -1433,13 +1429,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!tx_ring->tx_bi) goto err; - if (ring_is_xdp(tx_ring)) { - tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs), - GFP_KERNEL); - if (!tx_ring->xsk_descs) - goto err; - } - u64_stats_init(&tx_ring->syncp); /* round up to nearest 4K */ @@ -1463,8 +1452,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) return 0; err: - kfree(tx_ring->xsk_descs); - tx_ring->xsk_descs = NULL; kfree(tx_ring->tx_bi); tx_ring->tx_bi = NULL; return -ENOMEM; @@ -1675,6 +1662,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, return false; } + rx_ring->rx_stats.page_alloc_count++; + /* map page for use */ dma = dma_map_page_attrs(rx_ring->dev, page, 0, i40e_rx_pg_size(rx_ring), @@ -1982,32 +1971,43 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, /** * i40e_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buffer: buffer containing the page + * @rx_stats: rx stats structure for the rx ring * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call * * If page is reusable, we have a green light for calling i40e_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and - * page freed + * page freed. + * + * rx_stats will be updated to indicate whether the page was waived + * or busy if it could not be reused. */ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct i40e_rx_queue_stats *rx_stats, int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; /* Is any reuse possible? */ - if (!dev_page_is_reusable(page)) + if (!dev_page_is_reusable(page)) { + rx_stats->page_waive_count++; return false; + } #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) { + rx_stats->page_busy_count++; return false; + } #else #define I40E_LAST_OFFSET \ (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) - if (rx_buffer->page_offset > I40E_LAST_OFFSET) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) { + rx_stats->page_busy_count++; return false; + } #endif /* If we have drained the page fragment pool we need to update @@ -2237,7 +2237,7 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, int rx_buffer_pgcnt) { - if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { + if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); } else { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index bfc2845c99d1..c471c2da313c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -298,7 +298,9 @@ struct i40e_rx_queue_stats { u64 alloc_page_failed; u64 alloc_buff_failed; u64 page_reuse_count; - u64 realloc_count; + u64 page_alloc_count; + u64 page_waive_count; + u64 page_busy_count; }; enum i40e_ring_state_t { @@ -390,7 +392,6 @@ struct i40e_ring { u16 rx_offset; struct xdp_rxq_info xdp_rxq; struct xsk_buff_pool *xsk_pool; - struct xdp_desc *xsk_descs; /* For storing descriptors in the AF_XDP ZC path */ } ____cacheline_internodealigned_in_smp; static inline bool ring_uses_build_skb(struct i40e_ring *ring) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 67e9844e2076..5a997b0d07d8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -471,11 +471,11 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) **/ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) { - struct xdp_desc *descs = xdp_ring->xsk_descs; + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; - nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget); + nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); if (!nb_pkts) return true; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index e2e3ef7fba7f..e3df0134dc77 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2803,6 +2803,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, /* clone ring and setup updated count */ xdp_rings[i] = *vsi->xdp_rings[i]; xdp_rings[i].count = new_tx_cnt; + xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1; + xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1; xdp_rings[i].desc = NULL; xdp_rings[i].tx_buf = NULL; err = ice_setup_tx_ring(&xdp_rings[i]); diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index fc3580167e7b..263a2e7577a2 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -227,6 +227,11 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i]; + + qos->pfc_mode = dcbx_cfg->pfc_mode; + if (qos->pfc_mode == IIDC_DSCP_PFC_MODE) + for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++) + qos->dscp_map[i] = dcbx_cfg->dscp_map[i]; } EXPORT_SYMBOL_GPL(ice_get_qos_params); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f46af3b34074..63f43400a146 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2495,10 +2495,10 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; xdp_ring->netdev = NULL; - xdp_ring->next_dd = ICE_TX_THRESH - 1; - xdp_ring->next_rs = ICE_TX_THRESH - 1; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; + xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1; + xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1; WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index c2258bee8ecb..7b9b3b750bf0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -173,6 +173,8 @@ tx_skip_free: tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1; + tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1; if (!tx_ring->netdev) return; @@ -1467,7 +1469,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) bool wd; if (tx_ring->xsk_pool) - wd = ice_clean_tx_irq_zc(tx_ring, budget); + wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -1520,7 +1522,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) { + if (napi_complete_done(napi, work_done)) { ice_net_dim(q_vector); ice_enable_interrupt(q_vector); } else { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b7b3bd4816f0..466253ac2ee1 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -13,7 +13,6 @@ #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 -#define ICE_TX_THRESH 32 /* The size limit for a transmit buffer in a descriptor is (16K - 1). * In order to align with the read requests we will align the value to @@ -111,6 +110,8 @@ static inline int ice_skb_pad(void) (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) +#define ICE_RING_QUARTER(R) ((R)->count >> 2) + #define ICE_TX_FLAGS_TSO BIT(0) #define ICE_TX_FLAGS_HW_VLAN BIT(1) #define ICE_TX_FLAGS_SW_VLAN BIT(2) @@ -321,17 +322,18 @@ struct ice_tx_ring { u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ /* stats structs */ - struct ice_q_stats stats; - struct u64_stats_sync syncp; struct ice_txq_stats tx_stats; - /* CL3 - 3rd cacheline starts here */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; struct ice_ptp_tx *tx_tstamps; spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ + /* CL4 - 4th cacheline starts here */ + u16 xdp_tx_active; #define ICE_TX_FLAGS_RING_XDP BIT(0) u8 flags; u8 dcb_tc; /* Traffic class of ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 0e87b98e0966..eb21cec1d772 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -222,6 +222,7 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) { unsigned int total_bytes = 0, total_pkts = 0; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *next_dd_desc; u16 next_dd = xdp_ring->next_dd; @@ -233,7 +234,7 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) return; - for (i = 0; i < ICE_TX_THRESH; i++) { + for (i = 0; i < tx_thresh; i++) { tx_buf = &xdp_ring->tx_buf[ntc]; total_bytes += tx_buf->bytecount; @@ -254,9 +255,9 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) } next_dd_desc->cmd_type_offset_bsz = 0; - xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH; + xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh; if (xdp_ring->next_dd > xdp_ring->count) - xdp_ring->next_dd = ICE_TX_THRESH - 1; + xdp_ring->next_dd = tx_thresh - 1; xdp_ring->next_to_clean = ntc; ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes); } @@ -269,12 +270,13 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) */ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) { + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 i = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; dma_addr_t dma; - if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH) + if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh) ice_clean_xdp_irq(xdp_ring); if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { @@ -300,13 +302,14 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, size, 0); + xdp_ring->xdp_tx_active++; i++; if (i == xdp_ring->count) { i = 0; tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); tx_desc->cmd_type_offset_bsz |= cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs = ICE_TX_THRESH - 1; + xdp_ring->next_rs = tx_thresh - 1; } xdp_ring->next_to_use = i; @@ -314,7 +317,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); tx_desc->cmd_type_offset_bsz |= cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += ICE_TX_THRESH; + xdp_ring->next_rs += tx_thresh; } return ICE_XDP_TX; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index feb874bde171..ed430d566274 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -327,6 +327,13 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) bool if_running, pool_present = !!pool; int ret = 0, pool_failure = 0; + if (!is_power_of_2(vsi->rx_rings[qid]->count) || + !is_power_of_2(vsi->tx_rings[qid]->count)) { + netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n"); + pool_failure = -EINVAL; + goto failure; + } + if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); if (if_running) { @@ -349,6 +356,7 @@ xsk_pool_if_up: netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); } +failure: if (pool_failure) { netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n", pool_present ? "en" : "dis", pool_failure); @@ -359,33 +367,28 @@ xsk_pool_if_up: } /** - * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers - * @rx_ring: Rx ring + * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it + * @pool: XSK Buffer pool to pull the buffers from + * @xdp: SW ring of xdp_buff that will hold the buffers + * @rx_desc: Pointer to Rx descriptors that will be filled * @count: The number of buffers to allocate * * This function allocates a number of Rx buffers from the fill ring * or the internal recycle mechanism and places them on the Rx ring. * - * Returns true if all allocations were successful, false if any fail. + * Note that ring wrap should be handled by caller of this function. + * + * Returns the amount of allocated Rx descriptors */ -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, + union ice_32b_rx_flex_desc *rx_desc, u16 count) { - union ice_32b_rx_flex_desc *rx_desc; - u16 ntu = rx_ring->next_to_use; - struct xdp_buff **xdp; - u32 nb_buffs, i; dma_addr_t dma; + u16 buffs; + int i; - rx_desc = ICE_RX_DESC(rx_ring, ntu); - xdp = ice_xdp_buf(rx_ring, ntu); - - nb_buffs = min_t(u16, count, rx_ring->count - ntu); - nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); - if (!nb_buffs) - return false; - - i = nb_buffs; - while (i--) { + buffs = xsk_buff_alloc_batch(pool, xdp, count); + for (i = 0; i < buffs; i++) { dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->wb.status_error0 = 0; @@ -394,13 +397,77 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) xdp++; } + return buffs; +} + +/** + * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Place the @count of descriptors onto Rx ring. Handle the ring wrap + * for case where space from next_to_use up to the end of ring is less + * than @count. Finally do a tail bump. + * + * Returns true if all allocations were successful, false if any fail. + */ +static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +{ + union ice_32b_rx_flex_desc *rx_desc; + u32 nb_buffs_extra = 0, nb_buffs; + u16 ntu = rx_ring->next_to_use; + u16 total_count = count; + struct xdp_buff **xdp; + + rx_desc = ICE_RX_DESC(rx_ring, ntu); + xdp = ice_xdp_buf(rx_ring, ntu); + + if (ntu + count >= rx_ring->count) { + nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, + rx_desc, + rx_ring->count - ntu); + rx_desc = ICE_RX_DESC(rx_ring, 0); + xdp = ice_xdp_buf(rx_ring, 0); + ntu = 0; + count -= nb_buffs_extra; + ice_release_rx_desc(rx_ring, 0); + } + + nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count); + ntu += nb_buffs; if (ntu == rx_ring->count) ntu = 0; - ice_release_rx_desc(rx_ring, ntu); + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); - return count == nb_buffs; + return total_count == (nb_buffs_extra + nb_buffs); +} + +/** + * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Wrapper for internal allocation routine; figure out how many tail + * bumps should take place based on the given threshold + * + * Returns true if all calls to internal alloc routine succeeded + */ +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +{ + u16 rx_thresh = ICE_RING_QUARTER(rx_ring); + u16 batched, leftover, i, tail_bumps; + + batched = ALIGN_DOWN(count, rx_thresh); + tail_bumps = batched / rx_thresh; + leftover = count & (rx_thresh - 1); + + for (i = 0; i < tail_bumps; i++) + if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) + return false; + return __ice_alloc_rx_bufs_zc(rx_ring, leftover); } /** @@ -616,134 +683,221 @@ construct_skb: } /** - * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries + * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer * @xdp_ring: XDP Tx ring - * @budget: max number of frames to xmit + * @tx_buf: Tx buffer to clean + */ +static void +ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) +{ + xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); + xdp_ring->xdp_tx_active--; + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); +} + +/** + * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring + * @xdp_ring: XDP ring to clean + * @napi_budget: amount of descriptors that NAPI allows us to clean * - * Returns true if cleanup/transmission is done. + * Returns count of cleaned descriptors */ -static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget) +static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) { - struct ice_tx_desc *tx_desc = NULL; - bool work_done = true; - struct xdp_desc desc; - dma_addr_t dma; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + int budget = napi_budget / tx_thresh; + u16 next_dd = xdp_ring->next_dd; + u16 ntc, cleared_dds = 0; - while (likely(budget-- > 0)) { + do { + struct ice_tx_desc *next_dd_desc; + u16 desc_cnt = xdp_ring->count; struct ice_tx_buf *tx_buf; + u32 xsk_frames; + u16 i; - if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) { - xdp_ring->tx_stats.tx_busy++; - work_done = false; + next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); + if (!(next_dd_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) break; - } - - tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; - if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc)) - break; - - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, - desc.len); + cleared_dds++; + xsk_frames = 0; + if (likely(!xdp_ring->xdp_tx_active)) { + xsk_frames = tx_thresh; + goto skip; + } - tx_buf->bytecount = desc.len; + ntc = xdp_ring->next_to_clean; - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use); - tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = - ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0); + for (i = 0; i < tx_thresh; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; - xdp_ring->next_to_use++; - if (xdp_ring->next_to_use == xdp_ring->count) - xdp_ring->next_to_use = 0; - } - - if (tx_desc) { - ice_xdp_ring_update_tail(xdp_ring); - xsk_tx_release(xdp_ring->xsk_pool); - } + if (tx_buf->raw_buf) { + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + tx_buf->raw_buf = NULL; + } else { + xsk_frames++; + } - return budget > 0 && work_done; + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } +skip: + xdp_ring->next_to_clean += tx_thresh; + if (xdp_ring->next_to_clean >= desc_cnt) + xdp_ring->next_to_clean -= desc_cnt; + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + next_dd_desc->cmd_type_offset_bsz = 0; + next_dd = next_dd + tx_thresh; + if (next_dd >= desc_cnt) + next_dd = tx_thresh - 1; + } while (budget--); + + xdp_ring->next_dd = next_dd; + + return cleared_dds * tx_thresh; } /** - * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer - * @xdp_ring: XDP Tx ring - * @tx_buf: Tx buffer to clean + * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor + * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @desc: AF_XDP descriptor to pull the DMA address and length from + * @total_bytes: bytes accumulator that will be used for stats update */ -static void -ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) +static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, + unsigned int *total_bytes) { - xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); - dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); + struct ice_tx_desc *tx_desc; + dma_addr_t dma; + + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, + 0, desc->len, 0); + + *total_bytes += desc->len; } /** - * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries - * @xdp_ring: XDP Tx ring - * @budget: NAPI budget - * - * Returns true if cleanup/tranmission is done. + * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @total_bytes: bytes accumulator that will be used for stats update */ -bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget) +static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, + unsigned int *total_bytes) { - int total_packets = 0, total_bytes = 0; - s16 ntc = xdp_ring->next_to_clean; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u16 ntu = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; - struct ice_tx_buf *tx_buf; - u32 xsk_frames = 0; - bool xmit_done; + u32 i; - tx_desc = ICE_TX_DESC(xdp_ring, ntc); - tx_buf = &xdp_ring->tx_buf[ntc]; - ntc -= xdp_ring->count; + loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + dma_addr_t dma; - do { - if (!(tx_desc->cmd_type_offset_bsz & - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) - break; + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len); - total_bytes += tx_buf->bytecount; - total_packets++; + tx_desc = ICE_TX_DESC(xdp_ring, ntu++); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, + 0, descs[i].len, 0); - if (tx_buf->raw_buf) { - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); - tx_buf->raw_buf = NULL; - } else { - xsk_frames++; - } + *total_bytes += descs[i].len; + } - tx_desc->cmd_type_offset_bsz = 0; - tx_buf++; - tx_desc++; - ntc++; + xdp_ring->next_to_use = ntu; - if (unlikely(!ntc)) { - ntc -= xdp_ring->count; - tx_buf = xdp_ring->tx_buf; - tx_desc = ICE_TX_DESC(xdp_ring, 0); - } + if (xdp_ring->next_to_use > xdp_ring->next_rs) { + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += tx_thresh; + } +} - prefetch(tx_desc); +/** + * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @nb_pkts: count of packets to be send + * @total_bytes: bytes accumulator that will be used for stats update + */ +static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, + u32 nb_pkts, unsigned int *total_bytes) +{ + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u32 batched, leftover, i; + + batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); + leftover = nb_pkts & (PKTS_PER_BATCH - 1); + for (i = 0; i < batched; i += PKTS_PER_BATCH) + ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + for (; i < batched + leftover; i++) + ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); + + if (xdp_ring->next_to_use > xdp_ring->next_rs) { + struct ice_tx_desc *tx_desc; + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += tx_thresh; + } +} - } while (likely(--budget)); +/** + * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @budget: number of free descriptors on HW Tx ring that can be used + * @napi_budget: amount of descriptors that NAPI allows us to clean + * + * Returns true if there is no more work that needs to be done, false otherwise + */ +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget) +{ + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u32 nb_pkts, nb_processed = 0; + unsigned int total_bytes = 0; + + if (budget < tx_thresh) + budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget); + + nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); + if (!nb_pkts) + return true; + + if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { + struct ice_tx_desc *tx_desc; + + nb_processed = xdp_ring->count - xdp_ring->next_to_use; + ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs = tx_thresh - 1; + xdp_ring->next_to_use = 0; + } - ntc += xdp_ring->count; - xdp_ring->next_to_clean = ntc; + ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, + &total_bytes); - if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + ice_xdp_ring_update_tail(xdp_ring); + ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); - ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes); - xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK); - - return budget > 0 && xmit_done; + return nb_pkts < budget; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 4c7bd8e9dfc4..0cbb5793b5b8 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -6,19 +6,37 @@ #include "ice_txrx.h" #include "ice.h" +#define PKTS_PER_BATCH 8 + +#ifdef __clang__ +#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for +#elif __GNUC__ >= 4 +#define loop_unrolled_for _Pragma("GCC unroll 8") for +#else +#define loop_unrolled_for for +#endif + struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); -bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget); #else +static inline bool +ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + u32 __always_unused budget, + int __always_unused napi_budget) +{ + return false; +} + static inline int ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, struct xsk_buff_pool __always_unused *pool, @@ -35,13 +53,6 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, } static inline bool -ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring, - int __always_unused budget) -{ - return false; -} - -static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, u16 __always_unused count) { diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 51a2dcaf553d..2a5782063f4c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -965,10 +965,6 @@ static int igb_set_ringparam(struct net_device *netdev, memcpy(&temp_ring[i], adapter->rx_ring[i], sizeof(struct igb_ring)); - /* Clear copied XDP RX-queue info */ - memset(&temp_ring[i].xdp_rxq, 0, - sizeof(temp_ring[i].xdp_rxq)); - temp_ring[i].count = new_rx_count; err = igb_setup_rx_resources(&temp_ring[i]); if (err) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index bfa321e4003f..34b33b21e0dc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4345,7 +4345,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; - int size; + int size, res; + + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0); + if (res < 0) { + dev_err(dev, "Failed to register xdp_rxq index %u\n", + rx_ring->queue_index); + return res; + } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -4368,14 +4379,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) rx_ring->xdp_prog = adapter->xdp_prog; - /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, 0) < 0) - goto err; - return 0; err: + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b965fb809d84..74b2c590ed5d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -505,6 +505,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) u8 index = rx_ring->queue_index; int size, desc_len, res; + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, rx_ring->q_vector->napi.napi_id); if (res < 0) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 3631d612aaca..25491edc35ce 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -578,31 +578,78 @@ void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable) } } +static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id, + u8 *tx_pause, u8 *rx_pause) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (is_dev_rpm(cgx)) + return 0; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + *rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK); + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); + *tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV); + return 0; +} + /* Enable or disable forwarding received pause frames to Tx block */ void cgx_lmac_enadis_rx_pause_fwding(void *cgxd, int lmac_id, bool enable) { struct cgx *cgx = cgxd; + u8 rx_pause, tx_pause; + bool is_pfc_enabled; + struct lmac *lmac; u64 cfg; if (!cgx) return; - if (enable) { - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + lmac = lmac_pdata(lmac_id, cgx); + if (!lmac) + return; - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + /* Pause frames are not enabled just return */ + if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) + return; + + cgx_lmac_get_pause_frm_status(cgx, lmac_id, &rx_pause, &tx_pause); + is_pfc_enabled = rx_pause ? false : true; + + if (enable) { + if (!is_pfc_enabled) { + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + } else { + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); + cfg |= CGXX_SMUX_CBFC_CTL_BCK_EN; + cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg); + } } else { - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + if (!is_pfc_enabled) { + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + } else { + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); + cfg &= ~CGXX_SMUX_CBFC_CTL_BCK_EN; + cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg); + } } } @@ -722,26 +769,6 @@ int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable) return !!(last & DATA_PKT_TX_EN); } -static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id, - u8 *tx_pause, u8 *rx_pause) -{ - struct cgx *cgx = cgxd; - u64 cfg; - - if (is_dev_rpm(cgx)) - return 0; - - if (!is_lmac_valid(cgx, lmac_id)) - return -ENODEV; - - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - *rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK); - - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); - *tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV); - return 0; -} - static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause) { @@ -782,21 +809,8 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable) if (!is_lmac_valid(cgx, lmac_id)) return; - if (enable) { - /* Enable receive pause frames */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); - - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); - - /* Enable pause frames transmission */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); - cfg |= CGX_SMUX_TX_CTL_L2P_BP_CONV; - cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg); + if (enable) { /* Set pause time and interval */ cgx_write(cgx, lmac_id, CGXX_SMUX_TX_PAUSE_PKT_TIME, DEFAULT_PAUSE_TIME); @@ -813,21 +827,120 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable) cfg &= ~0xFFFFULL; cgx_write(cgx, lmac_id, CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL, cfg | (DEFAULT_PAUSE_TIME / 2)); - } else { - /* ALL pause frames received are completely ignored */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + } - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + /* ALL pause frames received are completely ignored */ + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + + /* Disable pause frames transmission */ + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); + cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV; + cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg); + + cfg = cgx_read(cgx, 0, CGXX_CMR_RX_OVR_BP); + cfg |= CGX_CMR_RX_OVR_BP_EN(lmac_id); + cfg &= ~CGX_CMR_RX_OVR_BP_BP(lmac_id); + cgx_write(cgx, 0, CGXX_CMR_RX_OVR_BP, cfg); +} + +int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, + int pfvf_idx) +{ + struct cgx *cgx = cgxd; + struct lmac *lmac; + + lmac = lmac_pdata(lmac_id, cgx); + if (!lmac) + return -ENODEV; + + if (!rx_pause) + clear_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap); + else + set_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap); + + if (!tx_pause) + clear_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap); + else + set_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap); + + /* check if other pfvfs are using flow control */ + if (!rx_pause && bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) { + dev_warn(&cgx->pdev->dev, + "Receive Flow control disable not permitted as its used by other PFVFs\n"); + return -EPERM; + } + + if (!tx_pause && bitmap_weight(lmac->tx_fc_pfvf_bmap.bmap, lmac->tx_fc_pfvf_bmap.max)) { + dev_warn(&cgx->pdev->dev, + "Transmit Flow control disable not permitted as its used by other PFVFs\n"); + return -EPERM; + } + + return 0; +} + +int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause, + u8 rx_pause, u16 pfc_en) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + /* Return as no traffic classes are requested */ + if (tx_pause && !pfc_en) + return 0; + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); - /* Disable pause frames transmission */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); - cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV; - cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg); + if (rx_pause) { + cfg |= (CGXX_SMUX_CBFC_CTL_RX_EN | + CGXX_SMUX_CBFC_CTL_BCK_EN | + CGXX_SMUX_CBFC_CTL_DRP_EN); + } else { + cfg &= ~(CGXX_SMUX_CBFC_CTL_RX_EN | + CGXX_SMUX_CBFC_CTL_BCK_EN | + CGXX_SMUX_CBFC_CTL_DRP_EN); } + + if (tx_pause) + cfg |= CGXX_SMUX_CBFC_CTL_TX_EN; + else + cfg &= ~CGXX_SMUX_CBFC_CTL_TX_EN; + + cfg = FIELD_SET(CGX_PFC_CLASS_MASK, pfc_en, cfg); + + cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg); + + /* Write source MAC address which will be filled into PFC packet */ + cfg = cgx_lmac_addr_get(cgx->cgx_id, lmac_id); + cgx_write(cgx, lmac_id, CGXX_SMUX_SMAC, cfg); + + return 0; +} + +int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, + u8 *rx_pause) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); + + *rx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_RX_EN); + *tx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_TX_EN); + + return 0; } void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable) @@ -1489,6 +1602,16 @@ static int cgx_lmac_init(struct cgx *cgx) /* Reserve first entry for default MAC address */ set_bit(0, lmac->mac_to_index_bmap.bmap); + lmac->rx_fc_pfvf_bmap.max = 128; + err = rvu_alloc_bitmap(&lmac->rx_fc_pfvf_bmap); + if (err) + goto err_dmac_bmap_free; + + lmac->tx_fc_pfvf_bmap.max = 128; + err = rvu_alloc_bitmap(&lmac->tx_fc_pfvf_bmap); + if (err) + goto err_rx_fc_bmap_free; + init_waitqueue_head(&lmac->wq_cmd_cmplt); mutex_init(&lmac->cmd_lock); spin_lock_init(&lmac->event_cb_lock); @@ -1505,6 +1628,10 @@ static int cgx_lmac_init(struct cgx *cgx) return cgx_lmac_verify_fwi_version(cgx); err_bitmap_free: + rvu_free_bitmap(&lmac->tx_fc_pfvf_bmap); +err_rx_fc_bmap_free: + rvu_free_bitmap(&lmac->rx_fc_pfvf_bmap); +err_dmac_bmap_free: rvu_free_bitmap(&lmac->mac_to_index_bmap); err_name_free: kfree(lmac->name); @@ -1572,6 +1699,8 @@ static struct mac_ops cgx_mac_ops = { .mac_enadis_ptp_config = cgx_lmac_ptp_config, .mac_rx_tx_enable = cgx_lmac_rx_tx_enable, .mac_tx_enable = cgx_lmac_tx_enable, + .pfc_config = cgx_lmac_pfc_config, + .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index ab1e4abdea38..bd2f33a26eee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -76,6 +76,13 @@ #define CGXX_SMUX_TX_CTL 0x20178 #define CGXX_SMUX_TX_PAUSE_PKT_TIME 0x20110 #define CGXX_SMUX_TX_PAUSE_PKT_INTERVAL 0x20120 +#define CGXX_SMUX_SMAC 0x20108 +#define CGXX_SMUX_CBFC_CTL 0x20218 +#define CGXX_SMUX_CBFC_CTL_RX_EN BIT_ULL(0) +#define CGXX_SMUX_CBFC_CTL_TX_EN BIT_ULL(1) +#define CGXX_SMUX_CBFC_CTL_DRP_EN BIT_ULL(2) +#define CGXX_SMUX_CBFC_CTL_BCK_EN BIT_ULL(3) +#define CGX_PFC_CLASS_MASK GENMASK_ULL(47, 32) #define CGXX_GMP_GMI_TX_PAUSE_PKT_TIME 0x38230 #define CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL 0x38248 #define CGX_SMUX_TX_CTL_L2P_BP_CONV BIT_ULL(7) @@ -172,4 +179,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset); int cgx_lmac_addr_update(u8 cgx_id, u8 lmac_id, u8 *mac_addr, u8 index); u64 cgx_read_dmac_ctrl(void *cgxd, int lmac_id); u64 cgx_read_dmac_entry(void *cgxd, int index); +int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, + u16 pfc_en); +int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, + u8 *rx_pause); +int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, + int pfvf_idx); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index b33e7d1d0851..f30581bf0688 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -17,6 +17,8 @@ * @resp: command response * @link_info: link related information * @mac_to_index_bmap: Mac address to CGX table index mapping + * @rx_fc_pfvf_bmap: Receive flow control enabled netdev mapping + * @tx_fc_pfvf_bmap: Transmit flow control enabled netdev mapping * @event_cb: callback for linkchange events * @event_cb_lock: lock for serializing callback with unregister * @cgx: parent cgx port @@ -33,6 +35,8 @@ struct lmac { u64 resp; struct cgx_link_user_info link_info; struct rsrc_bmap mac_to_index_bmap; + struct rsrc_bmap rx_fc_pfvf_bmap; + struct rsrc_bmap tx_fc_pfvf_bmap; struct cgx_event_cb event_cb; /* lock for serializing callback with unregister */ spinlock_t event_cb_lock; @@ -110,6 +114,12 @@ struct mac_ops { int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable); int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable); + int (*pfc_config)(void *cgxd, int lmac_id, + u8 tx_pause, u8 rx_pause, u16 pfc_en); + + int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id, + u8 *tx_pause, u8 *rx_pause); + }; struct cgx { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 58e2aeebc14f..550cb11197bf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -172,6 +172,8 @@ M(RPM_STATS, 0x21C, rpm_stats, msg_req, rpm_stats_rsp) \ M(CGX_MAC_ADDR_RESET, 0x21D, cgx_mac_addr_reset, msg_req, msg_rsp) \ M(CGX_MAC_ADDR_UPDATE, 0x21E, cgx_mac_addr_update, cgx_mac_addr_update_req, \ msg_rsp) \ +M(CGX_PRIO_FLOW_CTRL_CFG, 0x21F, cgx_prio_flow_ctrl_cfg, cgx_pfc_cfg, \ + cgx_pfc_rsp) \ /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \ npa_lf_alloc_req, npa_lf_alloc_rsp) \ @@ -609,6 +611,21 @@ struct rpm_stats_rsp { u64 tx_stats[RPM_TX_STATS_COUNT]; }; +struct cgx_pfc_cfg { + struct mbox_msghdr hdr; + u8 rx_pause; + u8 tx_pause; + u16 pfc_en; /* bitmap indicating pfc enabled traffic classes */ +}; + +struct cgx_pfc_rsp { + struct mbox_msghdr hdr; + u8 rx_pause; + u8 tx_pause; +}; + + /* NPA mbox message formats */ + struct npc_set_pkind { struct mbox_msghdr hdr; #define OTX2_PRIV_FLAGS_DEFAULT BIT_ULL(0) @@ -1603,6 +1620,8 @@ enum cgx_af_status { LMAC_AF_ERR_INVALID_PARAM = -1101, LMAC_AF_ERR_PF_NOT_MAPPED = -1102, LMAC_AF_ERR_PERM_DENIED = -1103, + LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED = -1104, + LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED = -1105, }; #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 9ea2f6ac38ec..d7a8aad46e12 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -32,6 +32,8 @@ static struct mac_ops rpm_mac_ops = { .mac_enadis_ptp_config = rpm_lmac_ptp_config, .mac_rx_tx_enable = rpm_lmac_rx_tx_enable, .mac_tx_enable = rpm_lmac_tx_enable, + .pfc_config = rpm_lmac_pfc_config, + .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, }; struct mac_ops *rpm_get_mac_ops(void) @@ -96,11 +98,20 @@ int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable) void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; + struct lmac *lmac; u64 cfg; if (!rpm) return; + lmac = lmac_pdata(lmac_id, rpm); + if (!lmac) + return; + + /* Pause frames are not enabled just return */ + if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) + return; + if (enable) { cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; @@ -122,13 +133,93 @@ int rpm_lmac_get_pause_frm_status(void *rpmd, int lmac_id, return -ENODEV; cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE); + if (!(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE)) { + *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE); + *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE); + } - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE); return 0; } +static void rpm_cfg_pfc_quanta_thresh(rpm_t *rpm, int lmac_id, u16 pfc_en, + bool enable) +{ + u64 quanta_offset = 0, quanta_thresh = 0, cfg; + int i, shift; + + /* Set pause time and interval */ + for_each_set_bit(i, (unsigned long *)&pfc_en, 16) { + switch (i) { + case 0: + case 1: + quanta_offset = RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL01_QUANTA_THRESH; + break; + case 2: + case 3: + quanta_offset = RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL23_QUANTA_THRESH; + break; + case 4: + case 5: + quanta_offset = RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL45_QUANTA_THRESH; + break; + case 6: + case 7: + quanta_offset = RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL67_QUANTA_THRESH; + break; + case 8: + case 9: + quanta_offset = RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL89_QUANTA_THRESH; + break; + case 10: + case 11: + quanta_offset = RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH; + break; + case 12: + case 13: + quanta_offset = RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH; + break; + case 14: + case 15: + quanta_offset = RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH; + break; + } + + if (!quanta_offset || !quanta_thresh) + continue; + + shift = (i % 2) ? 1 : 0; + cfg = rpm_read(rpm, lmac_id, quanta_offset); + if (enable) { + cfg |= ((u64)RPM_DEFAULT_PAUSE_TIME << shift * 16); + } else { + if (!shift) + cfg &= ~GENMASK_ULL(15, 0); + else + cfg &= ~GENMASK_ULL(31, 16); + } + rpm_write(rpm, lmac_id, quanta_offset, cfg); + + cfg = rpm_read(rpm, lmac_id, quanta_thresh); + if (enable) { + cfg |= ((u64)(RPM_DEFAULT_PAUSE_TIME / 2) << shift * 16); + } else { + if (!shift) + cfg &= ~GENMASK_ULL(15, 0); + else + cfg &= ~GENMASK_ULL(31, 16); + } + rpm_write(rpm, lmac_id, quanta_thresh, cfg); + } +} + int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause) { @@ -152,8 +243,12 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP); if (tx_pause) { + /* Configure CL0 Pause Quanta & threshold for 802.3X frames */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true); cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id); } else { + /* Disable all Pause Quanta & threshold values */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false); cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id); cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id); } @@ -166,56 +261,20 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) rpm_t *rpm = rpmd; u64 cfg; - if (enable) { - /* Enable 802.3 pause frame mode */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Enable receive pause frames */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Enable forward pause to TX block */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Enable pause frames transmission */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Set pause time and interval */ - cfg = rpm_read(rpm, lmac_id, - RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA); - cfg &= ~0xFFFFULL; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA, - cfg | RPM_DEFAULT_PAUSE_TIME); - /* Set pause interval as the hardware default is too short */ - cfg = rpm_read(rpm, lmac_id, - RPMX_MTI_MAC100X_CL01_QUANTA_THRESH); - cfg &= ~0xFFFFULL; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_QUANTA_THRESH, - cfg | (RPM_DEFAULT_PAUSE_TIME / 2)); - - } else { - /* ALL pause frames received are completely ignored */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* ALL pause frames received are completely ignored */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - /* Disable forward pause to TX block */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* Disable forward pause to TX block */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - /* Disable pause frames transmission */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - } + /* Disable pause frames transmission */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); } int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat) @@ -323,3 +382,65 @@ void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable) cfg &= ~RPMX_RX_TS_PREPEND; rpm_write(rpm, lmac_id, RPMX_CMRX_CFG, cfg); } + +int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 pfc_en) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + /* reset PFC class quanta and threshold */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false); + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + + if (rx_pause) { + cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + } else { + cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + } + + if (tx_pause) { + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, pfc_en, true); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + } else { + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xfff, false); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + } + + if (!rx_pause && !tx_pause) + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; + else + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; + + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL); + cfg = FIELD_SET(RPM_PFC_CLASS_MASK, pfc_en, cfg); + rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg); + + return 0; +} + +int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, u8 *rx_pause) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + if (cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE) { + *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE); + *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE); + } + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index ff580311edd0..9ab8d49dd180 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -33,7 +33,21 @@ #define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE BIT_ULL(8) #define RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE BIT_ULL(19) #define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 +#define RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA 0x80B0 +#define RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA 0x80B8 +#define RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA 0x80C0 #define RPMX_MTI_MAC100X_CL01_QUANTA_THRESH 0x80C8 +#define RPMX_MTI_MAC100X_CL23_QUANTA_THRESH 0x80D0 +#define RPMX_MTI_MAC100X_CL45_QUANTA_THRESH 0x80D8 +#define RPMX_MTI_MAC100X_CL67_QUANTA_THRESH 0x80E0 +#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108 +#define RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA 0x8110 +#define RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA 0x8118 +#define RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA 0x8120 +#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH 0x8128 +#define RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH 0x8130 +#define RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH 0x8138 +#define RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH 0x8140 #define RPM_DEFAULT_PAUSE_TIME 0xFFFF #define RPMX_CMR_RX_OVR_BP 0x4120 #define RPMX_CMR_RX_OVR_BP_EN(x) BIT_ULL((x) + 8) @@ -45,6 +59,18 @@ #define RPM_LMAC_FWI 0xa #define RPM_TX_EN BIT_ULL(0) #define RPM_RX_EN BIT_ULL(1) +#define RPMX_CMRX_PRT_CBFC_CTL 0x5B08 +#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_RX_SHIFT 33 +#define RPMX_CMRX_PRT_CBFC_CTL_PHYS_BP_SHIFT 16 +#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_TX_SHIFT 0 +#define RPM_PFC_CLASS_MASK GENMASK_ULL(48, 33) +#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH 0x8128 +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_PAD_EN BIT_ULL(11) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE BIT_ULL(8) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD BIT_ULL(7) +#define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 +#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108 +#define RPM_DEFAULT_PAUSE_TIME 0xFFFF /* Function Declarations */ int rpm_get_nr_lmacs(void *rpmd); @@ -61,4 +87,8 @@ int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat); void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable); int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable); int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, + u16 pfc_en); +int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, + u8 *rx_pause); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 5ed94cfb47d2..513b43ecd5be 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -807,6 +807,9 @@ u32 rvu_cgx_get_fifolen(struct rvu *rvu); void *rvu_first_cgx_pdata(struct rvu *rvu); int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id); int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); +int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause, + u16 pfc_en); +int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 8a7ac5a8b821..9ffe99830e34 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -863,6 +863,45 @@ int rvu_mbox_handler_cgx_intlbk_disable(struct rvu *rvu, struct msg_req *req, return 0; } +int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause) +{ + int pf = rvu_get_pf(pcifunc); + u8 rx_pfc = 0, tx_pfc = 0; + struct mac_ops *mac_ops; + u8 cgx_id, lmac_id; + void *cgxd; + + if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC)) + return 0; + + /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if (!is_pf_cgxmapped(rvu, pf)) + return LMAC_AF_ERR_PF_NOT_MAPPED; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &tx_pfc, &rx_pfc); + if (tx_pfc || rx_pfc) { + dev_warn(rvu->dev, + "Can not configure 802.3X flow control as PFC frames are enabled"); + return LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED; + } + + mutex_lock(&rvu->rsrc_lock); + if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause, + pcifunc & RVU_PFVF_FUNC_MASK)) { + mutex_unlock(&rvu->rsrc_lock); + return LMAC_AF_ERR_PERM_DENIED; + } + mutex_unlock(&rvu->rsrc_lock); + + return mac_ops->mac_enadis_pause_frm(cgxd, lmac_id, tx_pause, rx_pause); +} + int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, struct cgx_pause_frm_cfg *req, struct cgx_pause_frm_cfg *rsp) @@ -870,11 +909,9 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, int pf = rvu_get_pf(req->hdr.pcifunc); struct mac_ops *mac_ops; u8 cgx_id, lmac_id; + int err = 0; void *cgxd; - if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC)) - return 0; - /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, * if received from other PF/VF simply ACK, nothing to do. */ @@ -886,13 +923,11 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, mac_ops = get_mac_ops(cgxd); if (req->set) - mac_ops->mac_enadis_pause_frm(cgxd, lmac_id, - req->tx_pause, req->rx_pause); + err = rvu_cgx_cfg_pause_frm(rvu, req->hdr.pcifunc, req->tx_pause, req->rx_pause); else - mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, - &rsp->tx_pause, - &rsp->rx_pause); - return 0; + mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause); + + return err; } int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req, @@ -1079,3 +1114,67 @@ int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu, rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index); } + +int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, + u8 rx_pause, u16 pfc_en) +{ + int pf = rvu_get_pf(pcifunc); + u8 rx_8023 = 0, tx_8023 = 0; + struct mac_ops *mac_ops; + u8 cgx_id, lmac_id; + void *cgxd; + + /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if (!is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &tx_8023, &rx_8023); + if (tx_8023 || rx_8023) { + dev_warn(rvu->dev, + "Can not configure PFC as 802.3X pause frames are enabled"); + return LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED; + } + + mutex_lock(&rvu->rsrc_lock); + if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause, + pcifunc & RVU_PFVF_FUNC_MASK)) { + mutex_unlock(&rvu->rsrc_lock); + return LMAC_AF_ERR_PERM_DENIED; + } + mutex_unlock(&rvu->rsrc_lock); + + return mac_ops->pfc_config(cgxd, lmac_id, tx_pause, rx_pause, pfc_en); +} + +int rvu_mbox_handler_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, + struct cgx_pfc_cfg *req, + struct cgx_pfc_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + struct mac_ops *mac_ops; + u8 cgx_id, lmac_id; + void *cgxd; + int err; + + /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if (!is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + err = rvu_cgx_prio_flow_ctrl_cfg(rvu, req->hdr.pcifunc, req->tx_pause, + req->rx_pause, req->pfc_en); + + mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause); + return err; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 97fb61915379..73cd39af0090 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -296,7 +296,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf, struct rvu_hwinfo *hw = rvu->hw; struct sdp_node_info *sdp_info; int pkind, pf, vf, lbkid, vfid; - struct mac_ops *mac_ops; u8 cgx_id, lmac_id; bool from_vf; int err; @@ -326,13 +325,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf, cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind); rvu_npc_set_pkind(rvu, pkind, pfvf); - mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu)); - - /* By default we enable pause frames */ - if ((pcifunc & RVU_PFVF_FUNC_MASK) == 0) - mac_ops->mac_enadis_pause_frm(rvu_cgx_pdata(cgx_id, - rvu), - lmac_id, true, true); break; case NIX_INTF_TYPE_LBK: vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1; @@ -533,7 +525,7 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, */ switch (type) { case NIX_INTF_TYPE_CGX: - if ((req->chan_base + req->chan_cnt) > 15) + if ((req->chan_base + req->chan_cnt) > 16) return -EINVAL; rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id); /* Assign bpid based on cgx, lmac and chan id */ @@ -4578,6 +4570,12 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) pfvf->hw_rx_tstamp_en = false; } + /* reset priority flow control config */ + rvu_cgx_prio_flow_ctrl_cfg(rvu, pcifunc, 0, 0, 0); + + /* reset 802.3x flow control config */ + rvu_cgx_cfg_pause_frm(rvu, pcifunc, 0, 0); + nix_ctx_free(rvu, pfvf); nix_free_all_bandprof(rvu, pcifunc); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 0048b5946712..d463dc72d80a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -11,4 +11,7 @@ rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_devlink.o rvu_nicvf-y := otx2_vf.o otx2_devlink.o +rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o +rvu_nicvf-$(CONFIG_DCB) += otx2_dcbnl.o + ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 92c0ddb602ca..358e5b216698 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -269,6 +269,7 @@ unlock: mutex_unlock(&pfvf->mbox.lock); return err; } +EXPORT_SYMBOL(otx2_config_pause_frm); int otx2_set_flowkey_cfg(struct otx2_nic *pfvf) { @@ -938,7 +939,11 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) if (!is_otx2_lbkvf(pfvf->pdev)) { /* Enable receive CQ backpressure */ aq->cq.bp_ena = 1; +#ifdef CONFIG_DCB + aq->cq.bpid = pfvf->bpid[pfvf->queue_to_pfc_map[qidx]]; +#else aq->cq.bpid = pfvf->bpid[0]; +#endif /* Set backpressure level is same as cq pass level */ aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); @@ -1218,7 +1223,11 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, */ if (pfvf->nix_blkaddr == BLKADDR_NIX1) aq->aura.bp_ena = 1; +#ifdef CONFIG_DCB + aq->aura.nix0_bpid = pfvf->bpid[pfvf->queue_to_pfc_map[aura_id]]; +#else aq->aura.nix0_bpid = pfvf->bpid[0]; +#endif /* Set backpressure level for RQ's Aura */ aq->aura.bp = RQ_BP_LVL_AURA; @@ -1545,11 +1554,18 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) return -ENOMEM; req->chan_base = 0; - req->chan_cnt = 1; +#ifdef CONFIG_DCB + req->chan_cnt = pfvf->pfc_en ? IEEE_8021QAZ_MAX_TCS : 1; + req->bpid_per_chan = pfvf->pfc_en ? 1 : 0; +#else + req->chan_cnt = 1; req->bpid_per_chan = 0; +#endif + return otx2_sync_mbox_msg(&pfvf->mbox); } +EXPORT_SYMBOL(otx2_nix_config_bp); /* Mbox message handlers */ void mbox_handler_cgx_stats(struct otx2_nic *pfvf, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 56be20053931..b6be9784ea36 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -399,6 +399,11 @@ struct otx2_nic { /* Devlink */ struct otx2_devlink *dl; +#ifdef CONFIG_DCB + /* PFC */ + u8 pfc_en; + u8 *queue_to_pfc_map; +#endif }; static inline bool is_otx2_lbkvf(struct pci_dev *pdev) @@ -879,4 +884,11 @@ int otx2_dmacflt_remove(struct otx2_nic *pf, const u8 *mac, u8 bit_pos); int otx2_dmacflt_update(struct otx2_nic *pf, u8 *mac, u8 bit_pos); void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf); void otx2_dmacflt_update_pfmac_flow(struct otx2_nic *pfvf); + +#ifdef CONFIG_DCB +/* DCB support*/ +void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, bool pfc_enable); +int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf); +int otx2_dcbnl_set_ops(struct net_device *dev); +#endif #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c new file mode 100644 index 000000000000..723d2506d309 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Ethernet driver + * + * Copyright (C) 2021 Marvell. + * + */ + +#include "otx2_common.h" + +int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf) +{ + struct cgx_pfc_cfg *req; + struct cgx_pfc_rsp *rsp; + int err = 0; + + if (is_otx2_lbkvf(pfvf->pdev)) + return 0; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_prio_flow_ctrl_cfg(&pfvf->mbox); + if (!req) { + err = -ENOMEM; + goto unlock; + } + + if (pfvf->pfc_en) { + req->rx_pause = true; + req->tx_pause = true; + } else { + req->rx_pause = false; + req->tx_pause = false; + } + req->pfc_en = pfvf->pfc_en; + + if (!otx2_sync_mbox_msg(&pfvf->mbox)) { + rsp = (struct cgx_pfc_rsp *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + if (req->rx_pause != rsp->rx_pause || req->tx_pause != rsp->tx_pause) { + dev_warn(pfvf->dev, + "Failed to config PFC\n"); + err = -EPERM; + } + } +unlock: + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, + bool pfc_enable) +{ + bool if_up = netif_running(pfvf->netdev); + struct npa_aq_enq_req *npa_aq; + struct nix_aq_enq_req *aq; + int err = 0; + + if (pfvf->queue_to_pfc_map[qidx] && pfc_enable) { + dev_warn(pfvf->dev, + "PFC enable not permitted as Priority %d already mapped to Queue %d\n", + pfvf->queue_to_pfc_map[qidx], qidx); + return; + } + + if (if_up) { + netif_tx_stop_all_queues(pfvf->netdev); + netif_carrier_off(pfvf->netdev); + } + + pfvf->queue_to_pfc_map[qidx] = vlan_prio; + + aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); + if (!aq) { + err = -ENOMEM; + goto out; + } + + aq->cq.bpid = pfvf->bpid[vlan_prio]; + aq->cq_mask.bpid = GENMASK(8, 0); + + /* Fill AQ info */ + aq->qidx = qidx; + aq->ctype = NIX_AQ_CTYPE_CQ; + aq->op = NIX_AQ_INSTOP_WRITE; + + otx2_sync_mbox_msg(&pfvf->mbox); + + npa_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!npa_aq) { + err = -ENOMEM; + goto out; + } + npa_aq->aura.nix0_bpid = pfvf->bpid[vlan_prio]; + npa_aq->aura_mask.nix0_bpid = GENMASK(8, 0); + + /* Fill NPA AQ info */ + npa_aq->aura_id = qidx; + npa_aq->ctype = NPA_AQ_CTYPE_AURA; + npa_aq->op = NPA_AQ_INSTOP_WRITE; + otx2_sync_mbox_msg(&pfvf->mbox); + +out: + if (if_up) { + netif_carrier_on(pfvf->netdev); + netif_tx_start_all_queues(pfvf->netdev); + } + + if (err) + dev_warn(pfvf->dev, + "Updating BPIDs in CQ and Aura contexts of RQ%d failed with err %d\n", + qidx, err); +} + +static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + + pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + pfc->pfc_en = pfvf->pfc_en; + + return 0; +} + +static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + int err; + + /* Save PFC configuration to interface */ + pfvf->pfc_en = pfc->pfc_en; + + err = otx2_config_priority_flow_ctrl(pfvf); + if (err) + return err; + + /* Request Per channel Bpids */ + if (pfc->pfc_en) + otx2_nix_config_bp(pfvf, true); + + return 0; +} + +static u8 otx2_dcbnl_getdcbx(struct net_device __always_unused *dev) +{ + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; +} + +static u8 otx2_dcbnl_setdcbx(struct net_device __always_unused *dev, u8 mode) +{ + return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0; +} + +static const struct dcbnl_rtnl_ops otx2_dcbnl_ops = { + .ieee_getpfc = otx2_dcbnl_ieee_getpfc, + .ieee_setpfc = otx2_dcbnl_ieee_setpfc, + .getdcbx = otx2_dcbnl_getdcbx, + .setdcbx = otx2_dcbnl_setdcbx, +}; + +int otx2_dcbnl_set_ops(struct net_device *dev) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + + pfvf->queue_to_pfc_map = devm_kzalloc(pfvf->dev, pfvf->hw.rx_queues, + GFP_KERNEL); + if (!pfvf->queue_to_pfc_map) + return -ENOMEM; + dev->dcbnl_ops = &otx2_dcbnl_ops; + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 77a13fb555fb..54f235c216a9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -21,8 +21,10 @@ struct otx2_flow { u16 entry; bool is_vf; u8 rss_ctx_id; +#define DMAC_FILTER_RULE BIT(0) +#define PFC_FLOWCTRL_RULE BIT(1) + u16 rule_type; int vf; - bool dmac_filter; }; enum dmac_req { @@ -899,6 +901,9 @@ static int otx2_is_flow_rule_dmacfilter(struct otx2_nic *pfvf, static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) { u64 ring_cookie = flow->flow_spec.ring_cookie; +#ifdef CONFIG_DCB + int vlan_prio, qidx, pfc_rule = 0; +#endif struct npc_install_flow_req *req; int err, vf = 0; @@ -940,6 +945,24 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) mutex_unlock(&pfvf->mbox.lock); return -EINVAL; } + +#ifdef CONFIG_DCB + /* Identify PFC rule if PFC enabled and ntuple rule is vlan */ + if (!vf && (req->features & BIT_ULL(NPC_OUTER_VID)) && + pfvf->pfc_en && req->op != NIX_RX_ACTIONOP_RSS) { + vlan_prio = ntohs(req->packet.vlan_tci) & + ntohs(req->mask.vlan_tci); + + /* Get the priority */ + vlan_prio >>= 13; + flow->rule_type |= PFC_FLOWCTRL_RULE; + /* Check if PFC enabled for this priority */ + if (pfvf->pfc_en & BIT(vlan_prio)) { + pfc_rule = true; + qidx = req->index; + } + } +#endif } /* ethtool ring_cookie has (VF + 1) for VF */ @@ -951,6 +974,12 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) /* Send message to AF */ err = otx2_sync_mbox_msg(&pfvf->mbox); + +#ifdef CONFIG_DCB + if (!err && pfc_rule) + otx2_update_bpid_in_rqctx(pfvf, vlan_prio, qidx, true); +#endif + mutex_unlock(&pfvf->mbox.lock); return err; } @@ -966,7 +995,7 @@ static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf, return -ENOMEM; pf_mac->entry = 0; - pf_mac->dmac_filter = true; + pf_mac->rule_type |= DMAC_FILTER_RULE; pf_mac->location = pfvf->flow_cfg->max_flows; memcpy(&pf_mac->flow_spec, &flow->flow_spec, sizeof(struct ethtool_rx_flow_spec)); @@ -1031,7 +1060,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) eth_hdr = &flow->flow_spec.h_u.ether_spec; /* Sync dmac filter table with updated fields */ - if (flow->dmac_filter) + if (flow->rule_type & DMAC_FILTER_RULE) return otx2_dmacflt_update(pfvf, eth_hdr->h_dest, flow->entry); @@ -1052,7 +1081,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) if (!test_bit(0, &flow_cfg->dmacflt_bmap)) otx2_add_flow_with_pfmac(pfvf, flow); - flow->dmac_filter = true; + flow->rule_type |= DMAC_FILTER_RULE; flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap, flow_cfg->dmacflt_max_flows); fsp->location = flow_cfg->max_flows + flow->entry; @@ -1120,7 +1149,7 @@ static void otx2_update_rem_pfmac(struct otx2_nic *pfvf, int req) bool found = false; list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) { - if (iter->dmac_filter && iter->entry == 0) { + if ((iter->rule_type & DMAC_FILTER_RULE) && iter->entry == 0) { eth_hdr = &iter->flow_spec.h_u.ether_spec; if (req == DMAC_ADDR_DEL) { otx2_dmacflt_remove(pfvf, eth_hdr->h_dest, @@ -1156,7 +1185,7 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location) if (!flow) return -ENOENT; - if (flow->dmac_filter) { + if (flow->rule_type & DMAC_FILTER_RULE) { struct ethhdr *eth_hdr = &flow->flow_spec.h_u.ether_spec; /* user not allowed to remove dmac filter with interface mac */ @@ -1174,6 +1203,13 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location) flow_cfg->dmacflt_max_flows) == 1) otx2_update_rem_pfmac(pfvf, DMAC_ADDR_DEL); } else { +#ifdef CONFIG_DCB + if (flow->rule_type & PFC_FLOWCTRL_RULE) + otx2_update_bpid_in_rqctx(pfvf, 0, + flow->flow_spec.ring_cookie, + false); +#endif + err = otx2_remove_flow_msg(pfvf, flow->entry, false); } @@ -1383,7 +1419,7 @@ void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf) struct ethhdr *eth_hdr; list_for_each_entry(iter, &pf->flow_cfg->flow_list, list) { - if (iter->dmac_filter) { + if (iter->rule_type & DMAC_FILTER_RULE) { eth_hdr = &iter->flow_spec.h_u.ether_spec; otx2_dmacflt_add(pf, eth_hdr->h_dest, iter->entry); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 86c1c2f77bd7..ede4df51648b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1697,9 +1697,6 @@ int otx2_open(struct net_device *netdev) if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK)) otx2_handle_link_event(pf); - /* Restore pause frame settings */ - otx2_config_pause_frm(pf); - /* Install DMAC Filters */ if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT) otx2_dmacflt_reinstall_flows(pf); @@ -2782,9 +2779,11 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Enable link notifications */ otx2_cgx_config_linkevents(pf, true); - /* Enable pause frames by default */ - pf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; - pf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED; +#ifdef CONFIG_DCB + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_pf_sriov_init; +#endif return 0; @@ -2929,6 +2928,21 @@ static void otx2_remove(struct pci_dev *pdev) if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED) otx2_config_hw_rx_tstamp(pf, false); + /* Disable 802.3x pause frames */ + if (pf->flags & OTX2_FLAG_RX_PAUSE_ENABLED || + (pf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) { + pf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED; + pf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED; + otx2_config_pause_frm(pf); + } + +#ifdef CONFIG_DCB + /* Disable PFC config */ + if (pf->pfc_en) { + pf->pfc_en = 0; + otx2_config_priority_flow_ctrl(pf); + } +#endif cancel_work_sync(&pf->reset_task); /* Disable link notifications */ otx2_cgx_config_linkevents(pf, false); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index d96c8903c67e..78142498d046 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -702,9 +702,11 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_unreg_netdev; - /* Enable pause frames by default */ - vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; - vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED; +#ifdef CONFIG_DCB + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_unreg_netdev; +#endif return 0; @@ -740,6 +742,22 @@ static void otx2vf_remove(struct pci_dev *pdev) vf = netdev_priv(netdev); + /* Disable 802.3x pause frames */ + if (vf->flags & OTX2_FLAG_RX_PAUSE_ENABLED || + (vf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) { + vf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED; + vf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED; + otx2_config_pause_frm(vf); + } + +#ifdef CONFIG_DCB + /* Disable PFC config */ + if (vf->pfc_en) { + vf->pfc_en = 0; + otx2_config_priority_flow_ctrl(vf); + } +#endif + cancel_work_sync(&vf->reset_task); otx2_unregister_dl(vf); unregister_netdev(netdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 77e82e6cf6e8..fa33caecc91d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -1957,6 +1957,83 @@ int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, } EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter); +/* SIP DIP Action + * -------------- + * The SIP_DIP_ACTION is used for modifying the SIP and DIP fields of the + * packet, e.g. for NAT. The L3 checksum is updated. Also, if the L4 is TCP or + * if the L4 is UDP and the checksum field is not zero, then the L4 checksum is + * updated. + */ + +#define MLXSW_AFA_IP_CODE 0x11 +#define MLXSW_AFA_IP_SIZE 2 + +enum mlxsw_afa_ip_s_d { + /* ip refers to dip */ + MLXSW_AFA_IP_S_D_DIP, + /* ip refers to sip */ + MLXSW_AFA_IP_S_D_SIP, +}; + +/* afa_ip_s_d + * Source or destination. + */ +MLXSW_ITEM32(afa, ip, s_d, 0x00, 31, 1); + +enum mlxsw_afa_ip_m_l { + /* LSB: ip[63:0] refers to ip[63:0] */ + MLXSW_AFA_IP_M_L_LSB, + /* MSB: ip[63:0] refers to ip[127:64] */ + MLXSW_AFA_IP_M_L_MSB, +}; + +/* afa_ip_m_l + * MSB or LSB. + */ +MLXSW_ITEM32(afa, ip, m_l, 0x00, 30, 1); + +/* afa_ip_ip_63_32 + * Bits [63:32] in the IP address to change to. + */ +MLXSW_ITEM32(afa, ip, ip_63_32, 0x08, 0, 32); + +/* afa_ip_ip_31_0 + * Bits [31:0] in the IP address to change to. + */ +MLXSW_ITEM32(afa, ip, ip_31_0, 0x0C, 0, 32); + +static void mlxsw_afa_ip_pack(char *payload, enum mlxsw_afa_ip_s_d s_d, + enum mlxsw_afa_ip_m_l m_l, u32 ip_31_0, + u32 ip_63_32) +{ + mlxsw_afa_ip_s_d_set(payload, s_d); + mlxsw_afa_ip_m_l_set(payload, m_l); + mlxsw_afa_ip_ip_31_0_set(payload, ip_31_0); + mlxsw_afa_ip_ip_63_32_set(payload, ip_63_32); +} + +int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip, + bool is_lsb, u32 val_31_0, u32 val_63_32, + struct netlink_ext_ack *extack) +{ + enum mlxsw_afa_ip_s_d s_d = is_dip ? MLXSW_AFA_IP_S_D_DIP : + MLXSW_AFA_IP_S_D_SIP; + enum mlxsw_afa_ip_m_l m_l = is_lsb ? MLXSW_AFA_IP_M_L_LSB : + MLXSW_AFA_IP_M_L_MSB; + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_IP_CODE, + MLXSW_AFA_IP_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append IP action"); + return PTR_ERR(act); + } + + mlxsw_afa_ip_pack(act, s_d, m_l, val_31_0, val_63_32); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_ip); + /* L4 Port Action * -------------- * The L4_PORT_ACTION is used for modifying the sport and dport fields of the packet, e.g. for NAT. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 16cbd6acbb01..db58037be46e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -92,6 +92,9 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid, int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, u16 expected_irif, u16 min_mtu, bool rmid_valid, u32 kvdl_index); +int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip, + bool is_lsb, u32 val_31_0, u32 val_63_32, + struct netlink_ext_ack *extack); int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport, u16 l4_port, struct netlink_ext_ack *extack); int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 30942b6ffcf9..20588e699588 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -820,6 +820,24 @@ int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core); /* spectrum2_kvdl.c */ extern const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops; +enum mlxsw_sp_acl_mangle_field { + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP, + MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN, + MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT, + MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP, + MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4, +}; + struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; @@ -828,9 +846,14 @@ struct mlxsw_sp_acl_rule_info { ingress_bind_blocker:1, egress_bind_blocker:1, counter_valid:1, - policer_index_valid:1; + policer_index_valid:1, + ipv6_valid:1; unsigned int counter_index; u16 policer_index; + struct { + u32 prev_val; + enum mlxsw_sp_acl_mangle_field prev_field; + } ipv6; }; /* spectrum_flow.c */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 70c11bfac08f..6c5af018546f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -505,14 +505,6 @@ int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, extack); } -enum mlxsw_sp_acl_mangle_field { - MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD, - MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP, - MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN, - MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT, - MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT, -}; - struct mlxsw_sp_acl_mangle_action { enum flow_action_mangle_base htype; /* Offset is u32-aligned. */ @@ -561,6 +553,18 @@ static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = { MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0x0000ffff, 16, IP_SPORT), MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0xffff0000, 0, IP_DPORT), + + MLXSW_SP_ACL_MANGLE_ACTION_IP4(12, 0x00000000, 0, IP4_SIP), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(16, 0x00000000, 0, IP4_DIP), + + MLXSW_SP_ACL_MANGLE_ACTION_IP6(8, 0x00000000, 0, IP6_SIP_1), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(12, 0x00000000, 0, IP6_SIP_2), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(16, 0x00000000, 0, IP6_SIP_3), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(20, 0x00000000, 0, IP6_SIP_4), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(24, 0x00000000, 0, IP6_DIP_1), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(28, 0x00000000, 0, IP6_DIP_2), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(32, 0x00000000, 0, IP6_DIP_3), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(36, 0x00000000, 0, IP6_DIP_4), }; static int @@ -599,6 +603,22 @@ static int mlxsw_sp1_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, return err; } +static int +mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_sp_acl_mangle_field field, + u32 val, struct netlink_ext_ack *extack) +{ + if (!rulei->ipv6_valid) { + rulei->ipv6.prev_val = val; + rulei->ipv6_valid = true; + rulei->ipv6.prev_field = field; + return 0; + } + + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field order"); + return -EOPNOTSUPP; +} + static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct mlxsw_sp_acl_mangle_action *mact, @@ -615,6 +635,61 @@ static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, return mlxsw_afa_block_append_l4port(rulei->act_block, false, val, extack); case MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT: return mlxsw_afa_block_append_l4port(rulei->act_block, true, val, extack); + /* IPv4 fields */ + case MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP: + return mlxsw_afa_block_append_ip(rulei->act_block, false, + true, val, 0, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP: + return mlxsw_afa_block_append_ip(rulei->act_block, true, + true, val, 0, extack); + /* IPv6 fields */ + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3: + return mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(rulei, + mact->field, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + false, false, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + false, true, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + true, false, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + true, true, val, + rulei->ipv6.prev_val, + extack); + } + break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index bb417db773b9..f54af3d9a03b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -233,6 +233,12 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } } + + if (rulei->ipv6_valid) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field"); + return -EOPNOTSUPP; + } + return 0; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 8eaaa7559407..d62484f14564 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -4,11 +4,13 @@ #include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/iopoll.h> +#include <linux/ip.h> #include <linux/of_platform.h> #include <linux/of_net.h> #include <linux/packing.h> #include <linux/phy/phy.h> #include <linux/reset.h> +#include <net/addrconf.h> #include "lan966x_main.h" @@ -419,6 +421,32 @@ bool lan966x_netdevice_check(const struct net_device *dev) return dev->netdev_ops == &lan966x_port_netdev_ops; } +static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, + struct sk_buff *skb) +{ + u32 val; + + /* The IGMP and MLD frames are not forward by the HW if + * multicast snooping is enabled, therefor don't mark as + * offload to allow the SW to forward the frames accordingly. + */ + val = lan_rd(lan966x, ANA_CPU_FWD_CFG(port)); + if (!(val & (ANA_CPU_FWD_CFG_IGMP_REDIR_ENA | + ANA_CPU_FWD_CFG_MLD_REDIR_ENA))) + return true; + + if (skb->protocol == htons(ETH_P_IP) && + ip_hdr(skb)->protocol == IPPROTO_IGMP) + return false; + + if (skb->protocol == htons(ETH_P_IPV6) && + ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && + !ipv6_mc_check_mld(skb)) + return false; + + return true; +} + static int lan966x_port_xtr_status(struct lan966x *lan966x, u8 grp) { return lan_rd(lan966x, QS_XTR_RD(grp)); @@ -563,9 +591,14 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) lan966x_ptp_rxtstamp(lan966x, skb, timestamp); skb->protocol = eth_type_trans(skb, dev); - if (lan966x->bridge_mask & BIT(src_port)) + if (lan966x->bridge_mask & BIT(src_port)) { skb->offload_fwd_mark = 1; + skb_reset_network_header(skb); + if (!lan966x_hw_offload(lan966x, src_port, skb)) + skb->offload_fwd_mark = 0; + } + netif_rx_ni(skb); dev->stats.rx_bytes += len; dev->stats.rx_packets++; @@ -767,7 +800,7 @@ static void lan966x_init(struct lan966x *lan966x) /* Setup flooding PGIDs */ lan_wr(ANA_FLOODING_IPMC_FLD_MC4_DATA_SET(PGID_MCIPV4) | ANA_FLOODING_IPMC_FLD_MC4_CTRL_SET(PGID_MC) | - ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MC) | + ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MCIPV6) | ANA_FLOODING_IPMC_FLD_MC6_CTRL_SET(PGID_MC), lan966x, ANA_FLOODING_IPMC); @@ -829,6 +862,10 @@ static void lan966x_init(struct lan966x *lan966x) ANA_PGID_PGID, lan966x, ANA_PGID(PGID_MCIPV4)); + lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0), + ANA_PGID_PGID, + lan966x, ANA_PGID(PGID_MCIPV6)); + /* Unicast to all other ports */ lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0), ANA_PGID_PGID, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 026474c609ea..058e43531818 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -170,6 +170,7 @@ struct lan966x_port { bool vlan_aware; bool learn_ena; + bool mcast_ena; struct phylink_config phylink_config; struct phylink_pcs phylink_pcs; @@ -266,6 +267,8 @@ int lan966x_handle_port_mdb_del(struct lan966x_port *port, const struct switchdev_obj *obj); void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid); void lan966x_mdb_write_entries(struct lan966x *lan966x, u16 vid); +void lan966x_mdb_clear_entries(struct lan966x *lan966x); +void lan966x_mdb_restore_entries(struct lan966x *lan966x); int lan966x_ptp_init(struct lan966x *lan966x); void lan966x_ptp_deinit(struct lan966x *lan966x); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c index c68d0a99d292..2af55268bf4d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c @@ -504,3 +504,48 @@ void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid) lan966x_mdb_l2_cpu_remove(lan966x, mdb_entry, type); } } + +void lan966x_mdb_clear_entries(struct lan966x *lan966x) +{ + struct lan966x_mdb_entry *mdb_entry; + enum macaccess_entry_type type; + unsigned char mac[ETH_ALEN]; + + list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) { + type = lan966x_mdb_classify(mdb_entry->mac); + + lan966x_mdb_encode_mac(mac, mdb_entry, type); + /* Remove just the MAC entry, still keep the PGID in case of L2 + * entries because this can be restored at later point + */ + lan966x_mac_forget(lan966x, mac, mdb_entry->vid, type); + } +} + +void lan966x_mdb_restore_entries(struct lan966x *lan966x) +{ + struct lan966x_mdb_entry *mdb_entry; + enum macaccess_entry_type type; + unsigned char mac[ETH_ALEN]; + bool cpu_copy = false; + + list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) { + type = lan966x_mdb_classify(mdb_entry->mac); + + lan966x_mdb_encode_mac(mac, mdb_entry, type); + if (type == ENTRYTYPE_MACV4 || type == ENTRYTYPE_MACV6) { + /* Copy the frame to CPU only if the CPU is in the VLAN */ + if (lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x, + mdb_entry->vid) && + mdb_entry->cpu_copy) + cpu_copy = true; + + lan966x_mac_ip_learn(lan966x, cpu_copy, mac, + mdb_entry->vid, type); + } else { + lan966x_mac_learn(lan966x, mdb_entry->pgid->index, + mdb_entry->mac, + mdb_entry->vid, type); + } + } +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 37a5d7e63cb6..0c0b3e173d53 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -299,6 +299,24 @@ enum lan966x_target { /* ANA:PORT:CPU_FWD_CFG */ #define ANA_CPU_FWD_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4) +#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA BIT(6) +#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(x)\ + FIELD_PREP(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x) +#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_GET(x)\ + FIELD_GET(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x) + +#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA BIT(5) +#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(x)\ + FIELD_PREP(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x) +#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_GET(x)\ + FIELD_GET(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x) + +#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA BIT(4) +#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(x)\ + FIELD_PREP(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x) +#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_GET(x)\ + FIELD_GET(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x) + #define ANA_CPU_FWD_CFG_SRC_COPY_ENA BIT(3) #define ANA_CPU_FWD_CFG_SRC_COPY_ENA_SET(x)\ FIELD_PREP(ANA_CPU_FWD_CFG_SRC_COPY_ENA, x) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index 7de55f6a4da8..9fce865287e7 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -9,6 +9,37 @@ static struct notifier_block lan966x_netdevice_nb __read_mostly; static struct notifier_block lan966x_switchdev_nb __read_mostly; static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly; +static void lan966x_port_set_mcast_ip_flood(struct lan966x_port *port, + u32 pgid_ip) +{ + struct lan966x *lan966x = port->lan966x; + u32 flood_mask_ip; + + flood_mask_ip = lan_rd(lan966x, ANA_PGID(pgid_ip)); + flood_mask_ip = ANA_PGID_PGID_GET(flood_mask_ip); + + /* If mcast snooping is not enabled then use mcast flood mask + * to decide to enable multicast flooding or not. + */ + if (!port->mcast_ena) { + u32 flood_mask; + + flood_mask = lan_rd(lan966x, ANA_PGID(PGID_MC)); + flood_mask = ANA_PGID_PGID_GET(flood_mask); + + if (flood_mask & BIT(port->chip_port)) + flood_mask_ip |= BIT(port->chip_port); + else + flood_mask_ip &= ~BIT(port->chip_port); + } else { + flood_mask_ip &= ~BIT(port->chip_port); + } + + lan_rmw(ANA_PGID_PGID_SET(flood_mask_ip), + ANA_PGID_PGID, + lan966x, ANA_PGID(pgid_ip)); +} + static void lan966x_port_set_mcast_flood(struct lan966x_port *port, bool enabled) { @@ -23,6 +54,11 @@ static void lan966x_port_set_mcast_flood(struct lan966x_port *port, lan_rmw(ANA_PGID_PGID_SET(val), ANA_PGID_PGID, port->lan966x, ANA_PGID(PGID_MC)); + + if (!port->mcast_ena) { + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4); + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6); + } } static void lan966x_port_set_ucast_flood(struct lan966x_port *port, @@ -144,6 +180,28 @@ static void lan966x_port_ageing_set(struct lan966x_port *port, lan966x_mac_set_ageing(port->lan966x, ageing_time); } +static void lan966x_port_mc_set(struct lan966x_port *port, bool mcast_ena) +{ + struct lan966x *lan966x = port->lan966x; + + port->mcast_ena = mcast_ena; + if (mcast_ena) + lan966x_mdb_restore_entries(lan966x); + else + lan966x_mdb_clear_entries(lan966x); + + lan_rmw(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(mcast_ena) | + ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(mcast_ena) | + ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(mcast_ena), + ANA_CPU_FWD_CFG_IGMP_REDIR_ENA | + ANA_CPU_FWD_CFG_MLD_REDIR_ENA | + ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, + lan966x, ANA_CPU_FWD_CFG(port->chip_port)); + + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4); + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6); +} + static int lan966x_port_attr_set(struct net_device *dev, const void *ctx, const struct switchdev_attr *attr, struct netlink_ext_ack *extack) @@ -171,6 +229,9 @@ static int lan966x_port_attr_set(struct net_device *dev, const void *ctx, lan966x_vlan_port_set_vlan_aware(port, attr->u.vlan_filtering); lan966x_vlan_port_apply(port); break; + case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: + lan966x_port_mc_set(port, !attr->u.mc_disabled); + break; default: err = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 35689b5e212c..394de85d360d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -291,7 +291,6 @@ static int sparx5_create_port(struct sparx5 *sparx5, /* Create a phylink for PHY management. Also handles SFPs */ spx5_port->phylink_config.dev = &spx5_port->ndev->dev; spx5_port->phylink_config.type = PHYLINK_NETDEV; - spx5_port->phylink_config.pcs_poll = true; spx5_port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD | MAC_2500FD | MAC_5000FD | MAC_10000FD | MAC_25000FD; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 69e791e6abc4..b7d3ba1b4d17 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1085,8 +1085,10 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, break; case CQE_RX_TRUNCATED: - netdev_err(ndev, "Dropped a truncated packet\n"); - return; + ++ndev->stats.rx_dropped; + rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; + netdev_warn_once(ndev, "Dropped a truncated packet\n"); + goto drop; case CQE_RX_COALESCED_4: netdev_err(ndev, "RX coalescing is unsupported\n"); @@ -1102,9 +1104,6 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, return; } - if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) - return; - pktlen = oob->ppi[0].pkt_len; if (pktlen == 0) { @@ -1154,6 +1153,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, mana_rx_skb(old_buf, oob, rxq); +drop: mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); mana_post_pkt_rxq(rxq); diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index f7ad5487879b..15c295f90196 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -429,15 +429,6 @@ static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = { { 0x0d, 0xf880 } }; -static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = { - { 0x1f, 0x0002 }, - { 0x05, 0x669a }, - { 0x1f, 0x0005 }, - { 0x05, 0x8330 }, - { 0x06, 0x669a }, - { 0x1f, 0x0002 } -}; - static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, struct phy_device *phydev, u16 val) @@ -455,6 +446,29 @@ static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, r8169_apply_firmware(tp); } +static void rtl8168d_1_common(struct phy_device *phydev) +{ + u16 val; + + phy_write_paged(phydev, 0x0002, 0x05, 0x669a); + r8168d_phy_param(phydev, 0x8330, 0xffff, 0x669a); + phy_write(phydev, 0x1f, 0x0002); + + val = phy_read(phydev, 0x0d); + + if ((val & 0x00ff) != 0x006c) { + static const u16 set[] = { + 0x0065, 0x0066, 0x0067, 0x0068, + 0x0069, 0x006a, 0x006b, 0x006c + }; + int i; + + val &= 0xff00; + for (i = 0; i < ARRAY_SIZE(set); i++) + phy_write(phydev, 0x0d, val | set[i]); + } +} + static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev) { @@ -469,25 +483,7 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp, phy_modify(phydev, 0x0c, 0x5d00, 0xa200); if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - int val; - - rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1); - - val = phy_read(phydev, 0x0d); - - if ((val & 0x00ff) != 0x006c) { - static const u32 set[] = { - 0x0065, 0x0066, 0x0067, 0x0068, - 0x0069, 0x006a, 0x006b, 0x006c - }; - int i; - - phy_write(phydev, 0x1f, 0x0002); - - val &= 0xff00; - for (i = 0; i < ARRAY_SIZE(set); i++) - phy_write(phydev, 0x0d, val | set[i]); - } + rtl8168d_1_common(phydev); } else { phy_write_paged(phydev, 0x0002, 0x05, 0x6662); r8168d_phy_param(phydev, 0x8330, 0xffff, 0x6662); @@ -513,24 +509,7 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp, rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0); if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - int val; - - rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1); - - val = phy_read(phydev, 0x0d); - if ((val & 0x00ff) != 0x006c) { - static const u32 set[] = { - 0x0065, 0x0066, 0x0067, 0x0068, - 0x0069, 0x006a, 0x006b, 0x006c - }; - int i; - - phy_write(phydev, 0x1f, 0x0002); - - val &= 0xff00; - for (i = 0; i < ARRAY_SIZE(set); i++) - phy_write(phydev, 0x0d, val | set[i]); - } + rtl8168d_1_common(phydev); } else { phy_write_paged(phydev, 0x0002, 0x05, 0x2642); r8168d_phy_param(phydev, 0x8330, 0xffff, 0x2642); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 8e8778cfbbad..5943ff9f21c2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -383,10 +383,10 @@ static int intel_crosststamp(ktime_t *device, /* Repeat until the timestamps are from the FIFO last segment */ for (i = 0; i < num_snapshot; i++) { - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_ptptime(priv, ptpaddr, &ptp_time); *device = ns_to_ktime(ptp_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); get_arttime(priv->mii, intel_priv->mdio_adhoc_addr, &art_time); *system = convert_art_to_tsc(art_time); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 5b195d5051d6..57970ae2178d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -263,7 +263,7 @@ struct stmmac_priv { u32 adv_ts; int use_riwt; int irq_wake; - spinlock_t ptp_lock; + rwlock_t ptp_lock; /* Protects auxiliary snapshot registers from concurrent access. */ struct mutex aux_ts_lock; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index a7ec9f4d46ce..22fea0f67245 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -196,9 +196,9 @@ static void timestamp_interrupt(struct stmmac_priv *priv) GMAC_TIMESTAMP_ATSNS_SHIFT; for (i = 0; i < num_snapshot; i++) { - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); get_ptptime(priv->ptpaddr, &ptp_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); event.type = PTP_CLOCK_EXTTS; event.index = 0; event.timestamp = ptp_time; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 1c9f02f9c317..e45fb191d8e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -39,9 +39,9 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) diff = div_u64(adj, 1000000000ULL); addend = neg_adj ? (addend - diff) : (addend + diff); - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); stmmac_config_addend(priv, priv->ptpaddr, addend); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } @@ -86,9 +86,9 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) mutex_unlock(&priv->plat->est->lock); } - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); /* Caculate new basetime and re-configured EST after PTP time adjust. */ if (est_rst) { @@ -137,9 +137,9 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts) unsigned long flags; u64 ns = 0; - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_systime(priv, priv->ptpaddr, &ns); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); *ts = ns_to_timespec64(ns); @@ -162,9 +162,9 @@ static int stmmac_set_time(struct ptp_clock_info *ptp, container_of(ptp, struct stmmac_priv, ptp_clock_ops); unsigned long flags; - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); stmmac_init_systime(priv, priv->ptpaddr, ts->tv_sec, ts->tv_nsec); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } @@ -194,12 +194,12 @@ static int stmmac_enable(struct ptp_clock_info *ptp, cfg->period.tv_sec = rq->perout.period.sec; cfg->period.tv_nsec = rq->perout.period.nsec; - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); ret = stmmac_flex_pps_config(priv, priv->ioaddr, rq->perout.index, cfg, on, priv->sub_second_inc, priv->systime_flags); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); break; case PTP_CLK_REQ_EXTTS: priv->plat->ext_snapshot_en = on; @@ -314,7 +314,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv) stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num; stmmac_ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n; - spin_lock_init(&priv->ptp_lock); + rwlock_init(&priv->ptp_lock); mutex_init(&priv->aux_ts_lock); priv->ptp_clock_ops = stmmac_ptp_clock_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index be3cb63675a5..9f1759593b94 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -1777,9 +1777,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv) if (ret) return ret; - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_systime(priv, priv->ptpaddr, &curr_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); if (!curr_time) { ret = -EOPNOTSUPP; @@ -1799,9 +1799,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv) goto fail_disable; /* Check if expected time has elapsed */ - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_systime(priv, priv->ptpaddr, &curr_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); if ((curr_time - start_time) < STMMAC_TBS_LT_OFFSET) ret = -EINVAL; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index afa81a9480cc..e675d1016c3c 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -154,19 +154,15 @@ static void free_netvsc_device(struct rcu_head *head) kfree(nvdev->extension); - if (nvdev->recv_original_buf) { - hv_unmap_memory(nvdev->recv_buf); + if (nvdev->recv_original_buf) vfree(nvdev->recv_original_buf); - } else { + else vfree(nvdev->recv_buf); - } - if (nvdev->send_original_buf) { - hv_unmap_memory(nvdev->send_buf); + if (nvdev->send_original_buf) vfree(nvdev->send_original_buf); - } else { + else vfree(nvdev->send_buf); - } bitmap_free(nvdev->send_section_map); @@ -765,6 +761,12 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_send_gpadl(device, net_device, ndev); } + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + /* Release all resources */ free_netvsc_device_rcu(net_device); } @@ -1821,6 +1823,12 @@ cleanup: netif_napi_del(&net_device->chan_table[0].napi); cleanup2: + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 1544564bc283..87e1d43c118c 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -320,6 +320,17 @@ gsi_trans_tre_release(struct gsi_trans_info *trans_info, u32 tre_count) atomic_add(tre_count, &trans_info->tre_avail); } +/* Return true if no transactions are allocated, false otherwise */ +bool gsi_channel_trans_idle(struct gsi *gsi, u32 channel_id) +{ + u32 tre_max = gsi_channel_tre_max(gsi, channel_id); + struct gsi_trans_info *trans_info; + + trans_info = &gsi->channel[channel_id].trans_info; + + return atomic_read(&trans_info->tre_avail) == tre_max; +} + /* Allocate a GSI transaction on a channel */ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id, u32 tre_count, diff --git a/drivers/net/ipa/gsi_trans.h b/drivers/net/ipa/gsi_trans.h index 17fd1822d8a9..af379b49299e 100644 --- a/drivers/net/ipa/gsi_trans.h +++ b/drivers/net/ipa/gsi_trans.h @@ -130,6 +130,16 @@ void *gsi_trans_pool_alloc_dma(struct gsi_trans_pool *pool, dma_addr_t *addr); void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool); /** + * gsi_channel_trans_idle() - Return whether no transactions are allocated + * @gsi: GSI pointer + * @channel_id: Channel the transaction is associated with + * + * Return: True if no transactions are allocated, false otherwise + * + */ +bool gsi_channel_trans_idle(struct gsi *gsi, u32 channel_id); + +/** * gsi_channel_trans_alloc() - Allocate a GSI transaction on a channel * @gsi: GSI pointer * @channel_id: Channel the transaction is associated with diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index fffd0a784ef2..888e94278a84 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -25,7 +25,8 @@ #define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) -#define IPA_REPLENISH_BATCH 16 +/* Hardware is told about receive buffers once a "batch" has been queued */ +#define IPA_REPLENISH_BATCH 16 /* Must be non-zero */ /* The amount of RX buffer space consumed by standard skb overhead */ #define IPA_RX_BUFFER_OVERHEAD (PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0)) @@ -1036,10 +1037,9 @@ static void ipa_endpoint_status(struct ipa_endpoint *endpoint) iowrite32(val, ipa->reg_virt + offset); } -static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) +static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint, + struct gsi_trans *trans) { - struct gsi_trans *trans; - bool doorbell = false; struct page *page; u32 buffer_size; u32 offset; @@ -1051,121 +1051,84 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) if (!page) return -ENOMEM; - trans = ipa_endpoint_trans_alloc(endpoint, 1); - if (!trans) - goto err_free_pages; - /* Offset the buffer to make space for skb headroom */ offset = NET_SKB_PAD; len = buffer_size - offset; ret = gsi_trans_page_add(trans, page, len, offset); if (ret) - goto err_trans_free; - trans->data = page; /* transaction owns page now */ - - if (++endpoint->replenish_ready == IPA_REPLENISH_BATCH) { - doorbell = true; - endpoint->replenish_ready = 0; - } - - gsi_trans_commit(trans, doorbell); - - return 0; - -err_trans_free: - gsi_trans_free(trans); -err_free_pages: - __free_pages(page, get_order(buffer_size)); + __free_pages(page, get_order(buffer_size)); + else + trans->data = page; /* transaction owns page now */ - return -ENOMEM; + return ret; } /** * ipa_endpoint_replenish() - Replenish endpoint receive buffers * @endpoint: Endpoint to be replenished - * @add_one: Whether this is replacing a just-consumed buffer * * The IPA hardware can hold a fixed number of receive buffers for an RX * endpoint, based on the number of entries in the underlying channel ring * buffer. If an endpoint's "backlog" is non-zero, it indicates how many * more receive buffers can be supplied to the hardware. Replenishing for - * an endpoint can be disabled, in which case requests to replenish a - * buffer are "saved", and transferred to the backlog once it is re-enabled - * again. + * an endpoint can be disabled, in which case buffers are not queued to + * the hardware. */ -static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one) +static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint) { - struct gsi *gsi; - u32 backlog; - int delta; + struct gsi_trans *trans; - if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) { - if (add_one) - atomic_inc(&endpoint->replenish_saved); + if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) return; - } - /* If already active, just update the backlog */ - if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) { - if (add_one) - atomic_inc(&endpoint->replenish_backlog); + /* Skip it if it's already active */ + if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) return; - } - while (atomic_dec_not_zero(&endpoint->replenish_backlog)) - if (ipa_endpoint_replenish_one(endpoint)) + while ((trans = ipa_endpoint_trans_alloc(endpoint, 1))) { + bool doorbell; + + if (ipa_endpoint_replenish_one(endpoint, trans)) goto try_again_later; - clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); - if (add_one) - atomic_inc(&endpoint->replenish_backlog); + /* Ring the doorbell if we've got a full batch */ + doorbell = !(++endpoint->replenish_count % IPA_REPLENISH_BATCH); + gsi_trans_commit(trans, doorbell); + } + + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); return; try_again_later: + gsi_trans_free(trans); clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); - /* The last one didn't succeed, so fix the backlog */ - delta = add_one ? 2 : 1; - backlog = atomic_add_return(delta, &endpoint->replenish_backlog); - /* Whenever a receive buffer transaction completes we'll try to * replenish again. It's unlikely, but if we fail to supply even * one buffer, nothing will trigger another replenish attempt. - * Receive buffer transactions use one TRE, so schedule work to - * try replenishing again if our backlog is *all* available TREs. + * If the hardware has no receive buffers queued, schedule work to + * try replenishing again. */ - gsi = &endpoint->ipa->gsi; - if (backlog == gsi_channel_tre_max(gsi, endpoint->channel_id)) + if (gsi_channel_trans_idle(&endpoint->ipa->gsi, endpoint->channel_id)) schedule_delayed_work(&endpoint->replenish_work, msecs_to_jiffies(1)); } static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint) { - struct gsi *gsi = &endpoint->ipa->gsi; - u32 max_backlog; - u32 saved; - set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); - while ((saved = atomic_xchg(&endpoint->replenish_saved, 0))) - atomic_add(saved, &endpoint->replenish_backlog); /* Start replenishing if hardware currently has no buffers */ - max_backlog = gsi_channel_tre_max(gsi, endpoint->channel_id); - if (atomic_read(&endpoint->replenish_backlog) == max_backlog) - ipa_endpoint_replenish(endpoint, false); + if (gsi_channel_trans_idle(&endpoint->ipa->gsi, endpoint->channel_id)) + ipa_endpoint_replenish(endpoint); } static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint) { - u32 backlog; - clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); - while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0))) - atomic_add(backlog, &endpoint->replenish_saved); } static void ipa_endpoint_replenish_work(struct work_struct *work) @@ -1175,7 +1138,7 @@ static void ipa_endpoint_replenish_work(struct work_struct *work) endpoint = container_of(dwork, struct ipa_endpoint, replenish_work); - ipa_endpoint_replenish(endpoint, false); + ipa_endpoint_replenish(endpoint); } static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint, @@ -1380,10 +1343,8 @@ static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint, { struct page *page; - ipa_endpoint_replenish(endpoint, true); - if (trans->cancelled) - return; + goto done; /* Parse or build a socket buffer using the actual received length */ page = trans->data; @@ -1391,6 +1352,8 @@ static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint, ipa_endpoint_status_parse(endpoint, page, trans->len); else if (ipa_endpoint_skb_build(endpoint, page, trans->len)) trans->data = NULL; /* Pages have been consumed */ +done: + ipa_endpoint_replenish(endpoint); } void ipa_endpoint_trans_complete(struct ipa_endpoint *endpoint, @@ -1727,9 +1690,6 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint) */ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); - atomic_set(&endpoint->replenish_saved, - gsi_channel_tre_max(gsi, endpoint->channel_id)); - atomic_set(&endpoint->replenish_backlog, 0); INIT_DELAYED_WORK(&endpoint->replenish_work, ipa_endpoint_replenish_work); } @@ -1905,6 +1865,8 @@ u32 ipa_endpoint_init(struct ipa *ipa, u32 count, enum ipa_endpoint_name name; u32 filter_map; + BUILD_BUG_ON(!IPA_REPLENISH_BATCH); + if (!ipa_endpoint_data_valid(ipa, count, data)) return 0; /* Error */ diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h index 0313cdc607de..12fd5b16c18e 100644 --- a/drivers/net/ipa/ipa_endpoint.h +++ b/drivers/net/ipa/ipa_endpoint.h @@ -65,9 +65,7 @@ enum ipa_replenish_flag { * @evt_ring_id: GSI event ring used by the endpoint * @netdev: Network device pointer, if endpoint uses one * @replenish_flags: Replenishing state flags - * @replenish_ready: Number of replenish transactions without doorbell - * @replenish_saved: Replenish requests held while disabled - * @replenish_backlog: Number of buffers needed to fill hardware queue + * @replenish_count: Total number of replenish transactions committed * @replenish_work: Work item used for repeated replenish failures */ struct ipa_endpoint { @@ -86,9 +84,7 @@ struct ipa_endpoint { /* Receive buffer replenishing for RX endpoints */ DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT); - u32 replenish_ready; - atomic_t replenish_saved; - atomic_t replenish_backlog; + u64 replenish_count; struct delayed_work replenish_work; /* global wq */ }; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 420201858564..5b53a3e23c89 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -132,17 +132,6 @@ void phylink_set_port_modes(unsigned long *mask) } EXPORT_SYMBOL_GPL(phylink_set_port_modes); -void phylink_set_10g_modes(unsigned long *mask) -{ - phylink_set(mask, 10000baseT_Full); - phylink_set(mask, 10000baseCR_Full); - phylink_set(mask, 10000baseSR_Full); - phylink_set(mask, 10000baseLR_Full); - phylink_set(mask, 10000baseLRM_Full); - phylink_set(mask, 10000baseER_Full); -} -EXPORT_SYMBOL_GPL(phylink_set_10g_modes); - static int phylink_is_empty_linkmode(const unsigned long *linkmode) { __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = { 0, }; diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index b554054a7560..e62fc4f2aee0 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -358,6 +358,7 @@ config USB_NET_SMSC95XX select BITREVERSE select CRC16 select CRC32 + imply NET_SELFTESTS help This option adds support for SMSC LAN95XX based USB 2.0 10/100 Ethernet adapters. diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 2a1e31defe71..4334aafab59a 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -192,8 +192,8 @@ extern const struct driver_info ax88172a_info; /* ASIX specific flags */ #define FLAG_EEPROM_MAC (1UL << 0) /* init device MAC from eeprom */ -int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm); +int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm); int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 71682970be58..524805285019 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -11,8 +11,8 @@ #define AX_HOST_EN_RETRIES 30 -int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm) +int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); @@ -27,9 +27,12 @@ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely(ret < size)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", index, ret); + } return ret; } @@ -79,7 +82,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) 0, 0, 1, &smsr, in_pm); if (ret == -ENODEV) break; - else if (ret < sizeof(smsr)) + else if (ret < 0) continue; else if (smsr & AX_HOST_EN) break; @@ -579,8 +582,12 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) return ret; } - asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, - (__u16)loc, 2, &res, 1); + ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, + (__u16)loc, 2, &res, 1); + if (ret < 0) { + mutex_unlock(&dev->phy_mutex); + return ret; + } asix_set_hw_mii(dev, 1); mutex_unlock(&dev->phy_mutex); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 9b72334aabb6..6ea44e53713a 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -755,7 +755,12 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) priv->phy_addr = ret; priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10); - asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read STATMNGSTS_REG: %d\n", ret); + return ret; + } + chipcode &= AX_CHIPCODE_MASK; ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : @@ -919,11 +924,21 @@ static int ax88178_reset(struct usbnet *dev) int gpio0 = 0; u32 phyid; - asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); + ret = asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read GPIOS: %d\n", ret); + return ret; + } + netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status); asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0); - asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read EEPROM: %d\n", ret); + return ret; + } + asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0); netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom); diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 82bb5ed94c48..a7c1434fe2da 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -21,6 +21,7 @@ #include <net/ipv6.h> #include <net/addrconf.h> #include <net/ipv6_stubs.h> +#include <net/ndisc.h> /* alternative VLAN for IP session 0 if not untagged */ #define MBIM_IPS0_VID 4094 diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index bc1e3dd67c04..5567220e9d16 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -20,6 +20,8 @@ #include <linux/of_net.h> #include <linux/mdio.h> #include <linux/phy.h> +#include <net/selftests.h> + #include "smsc95xx.h" #define SMSC_CHIPNAME "smsc95xx" @@ -727,6 +729,26 @@ static u32 smsc95xx_get_link(struct net_device *net) return net->phydev->link; } +static void smsc95xx_ethtool_get_strings(struct net_device *netdev, u32 sset, + u8 *data) +{ + switch (sset) { + case ETH_SS_TEST: + net_selftest_get_strings(data); + break; + } +} + +static int smsc95xx_ethtool_get_sset_count(struct net_device *ndev, int sset) +{ + switch (sset) { + case ETH_SS_TEST: + return net_selftest_get_count(); + default: + return -EOPNOTSUPP; + } +} + static const struct ethtool_ops smsc95xx_ethtool_ops = { .get_link = smsc95xx_get_link, .nway_reset = phy_ethtool_nway_reset, @@ -743,6 +765,9 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = { .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_ts_info = ethtool_op_get_ts_info, + .self_test = net_selftest, + .get_strings = smsc95xx_ethtool_get_strings, + .get_sset_count = smsc95xx_ethtool_get_sset_count, }; static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c index 8070f3fd98f0..7d4da9e605ef 100644 --- a/drivers/ptp/ptp_pch.c +++ b/drivers/ptp/ptp_pch.c @@ -10,9 +10,10 @@ #include <linux/device.h> #include <linux/err.h> -#include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/io-64-nonatomic-hi-lo.h> #include <linux/irq.h> #include <linux/kernel.h> #include <linux/module.h> @@ -100,7 +101,6 @@ struct pch_ts_regs { #define PCH_ECS_ETH (1 << 0) #define PCH_ECS_CAN (1 << 1) -#define PCH_STATION_BYTES 6 #define PCH_IEEE1588_ETH (1 << 0) #define PCH_IEEE1588_CAN (1 << 1) @@ -115,8 +115,6 @@ struct pch_dev { int exts0_enabled; int exts1_enabled; - u32 mem_base; - u32 mem_size; u32 irq; struct pci_dev *pdev; spinlock_t register_lock; @@ -148,28 +146,15 @@ static inline void pch_eth_enable_set(struct pch_dev *chip) static u64 pch_systime_read(struct pch_ts_regs __iomem *regs) { u64 ns; - u32 lo, hi; - lo = ioread32(®s->systime_lo); - hi = ioread32(®s->systime_hi); + ns = ioread64_lo_hi(®s->systime_lo); - ns = ((u64) hi) << 32; - ns |= lo; - ns <<= TICKS_NS_SHIFT; - - return ns; + return ns << TICKS_NS_SHIFT; } static void pch_systime_write(struct pch_ts_regs __iomem *regs, u64 ns) { - u32 hi, lo; - - ns >>= TICKS_NS_SHIFT; - hi = ns >> 32; - lo = ns & 0xffffffff; - - iowrite32(lo, ®s->systime_lo); - iowrite32(hi, ®s->systime_hi); + iowrite64_lo_hi(ns >> TICKS_NS_SHIFT, ®s->systime_lo); } static inline void pch_block_reset(struct pch_dev *chip) @@ -235,16 +220,10 @@ u64 pch_rx_snap_read(struct pci_dev *pdev) { struct pch_dev *chip = pci_get_drvdata(pdev); u64 ns; - u32 lo, hi; - lo = ioread32(&chip->regs->rx_snap_lo); - hi = ioread32(&chip->regs->rx_snap_hi); + ns = ioread64_lo_hi(&chip->regs->rx_snap_lo); - ns = ((u64) hi) << 32; - ns |= lo; - ns <<= TICKS_NS_SHIFT; - - return ns; + return ns << TICKS_NS_SHIFT; } EXPORT_SYMBOL(pch_rx_snap_read); @@ -252,16 +231,10 @@ u64 pch_tx_snap_read(struct pci_dev *pdev) { struct pch_dev *chip = pci_get_drvdata(pdev); u64 ns; - u32 lo, hi; - - lo = ioread32(&chip->regs->tx_snap_lo); - hi = ioread32(&chip->regs->tx_snap_hi); - ns = ((u64) hi) << 32; - ns |= lo; - ns <<= TICKS_NS_SHIFT; + ns = ioread64_lo_hi(&chip->regs->tx_snap_lo); - return ns; + return ns << TICKS_NS_SHIFT; } EXPORT_SYMBOL(pch_tx_snap_read); @@ -292,8 +265,9 @@ static void pch_reset(struct pch_dev *chip) */ int pch_set_station_address(u8 *addr, struct pci_dev *pdev) { - s32 i; struct pch_dev *chip = pci_get_drvdata(pdev); + bool valid; + u64 mac; /* Verify the parameter */ if ((chip->regs == NULL) || addr == (u8 *)NULL) { @@ -301,37 +275,15 @@ int pch_set_station_address(u8 *addr, struct pci_dev *pdev) "invalid params returning PCH_INVALIDPARAM\n"); return PCH_INVALIDPARAM; } - /* For all station address bytes */ - for (i = 0; i < PCH_STATION_BYTES; i++) { - u32 val; - s32 tmp; - - tmp = hex_to_bin(addr[i * 3]); - if (tmp < 0) { - dev_err(&pdev->dev, - "invalid params returning PCH_INVALIDPARAM\n"); - return PCH_INVALIDPARAM; - } - val = tmp * 16; - tmp = hex_to_bin(addr[(i * 3) + 1]); - if (tmp < 0) { - dev_err(&pdev->dev, - "invalid params returning PCH_INVALIDPARAM\n"); - return PCH_INVALIDPARAM; - } - val += tmp; - /* Expects ':' separated addresses */ - if ((i < 5) && (addr[(i * 3) + 2] != ':')) { - dev_err(&pdev->dev, - "invalid params returning PCH_INVALIDPARAM\n"); - return PCH_INVALIDPARAM; - } - /* Ideally we should set the address only after validating - entire string */ - dev_dbg(&pdev->dev, "invoking pch_station_set\n"); - iowrite32(val, &chip->regs->ts_st[i]); + valid = mac_pton(addr, (u8 *)&mac); + if (!valid) { + dev_err(&pdev->dev, "invalid params returning PCH_INVALIDPARAM\n"); + return PCH_INVALIDPARAM; } + + dev_dbg(&pdev->dev, "invoking pch_station_set\n"); + iowrite64_lo_hi(mac, &chip->regs->ts_st); return 0; } EXPORT_SYMBOL(pch_set_station_address); @@ -344,19 +296,16 @@ static irqreturn_t isr(int irq, void *priv) struct pch_dev *pch_dev = priv; struct pch_ts_regs __iomem *regs = pch_dev->regs; struct ptp_clock_event event; - u32 ack = 0, lo, hi, val; + u32 ack = 0, val; val = ioread32(®s->event); if (val & PCH_TSE_SNS) { ack |= PCH_TSE_SNS; if (pch_dev->exts0_enabled) { - hi = ioread32(®s->asms_hi); - lo = ioread32(®s->asms_lo); event.type = PTP_CLOCK_EXTTS; event.index = 0; - event.timestamp = ((u64) hi) << 32; - event.timestamp |= lo; + event.timestamp = ioread64_hi_lo(®s->asms_hi); event.timestamp <<= TICKS_NS_SHIFT; ptp_clock_event(pch_dev->ptp_clock, &event); } @@ -365,12 +314,9 @@ static irqreturn_t isr(int irq, void *priv) if (val & PCH_TSE_SNM) { ack |= PCH_TSE_SNM; if (pch_dev->exts1_enabled) { - hi = ioread32(®s->amms_hi); - lo = ioread32(®s->amms_lo); event.type = PTP_CLOCK_EXTTS; event.index = 1; - event.timestamp = ((u64) hi) << 32; - event.timestamp |= lo; + event.timestamp = ioread64_hi_lo(®s->asms_hi); event.timestamp <<= TICKS_NS_SHIFT; ptp_clock_event(pch_dev->ptp_clock, &event); } @@ -501,31 +447,12 @@ static const struct ptp_clock_info ptp_pch_caps = { .enable = ptp_pch_enable, }; -#define pch_suspend NULL -#define pch_resume NULL - static void pch_remove(struct pci_dev *pdev) { struct pch_dev *chip = pci_get_drvdata(pdev); + free_irq(pdev->irq, chip); ptp_clock_unregister(chip->ptp_clock); - /* free the interrupt */ - if (pdev->irq != 0) - free_irq(pdev->irq, chip); - - /* unmap the virtual IO memory space */ - if (chip->regs != NULL) { - iounmap(chip->regs); - chip->regs = NULL; - } - /* release the reserved IO memory space */ - if (chip->mem_base != 0) { - release_mem_region(chip->mem_base, chip->mem_size); - chip->mem_base = 0; - } - pci_disable_device(pdev); - kfree(chip); - dev_info(&pdev->dev, "complete\n"); } static s32 @@ -535,50 +462,29 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id) unsigned long flags; struct pch_dev *chip; - chip = kzalloc(sizeof(struct pch_dev), GFP_KERNEL); + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); if (chip == NULL) return -ENOMEM; /* enable the 1588 pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret != 0) { dev_err(&pdev->dev, "could not enable the pci device\n"); - goto err_pci_en; + return ret; } - chip->mem_base = pci_resource_start(pdev, IO_MEM_BAR); - if (!chip->mem_base) { + ret = pcim_iomap_regions(pdev, BIT(IO_MEM_BAR), "1588_regs"); + if (ret) { dev_err(&pdev->dev, "could not locate IO memory address\n"); - ret = -ENODEV; - goto err_pci_start; - } - - /* retrieve the available length of the IO memory space */ - chip->mem_size = pci_resource_len(pdev, IO_MEM_BAR); - - /* allocate the memory for the device registers */ - if (!request_mem_region(chip->mem_base, chip->mem_size, "1588_regs")) { - dev_err(&pdev->dev, - "could not allocate register memory space\n"); - ret = -EBUSY; - goto err_req_mem_region; + return ret; } /* get the virtual address to the 1588 registers */ - chip->regs = ioremap(chip->mem_base, chip->mem_size); - - if (!chip->regs) { - dev_err(&pdev->dev, "Could not get virtual address\n"); - ret = -ENOMEM; - goto err_ioremap; - } - + chip->regs = pcim_iomap_table(pdev)[IO_MEM_BAR]; chip->caps = ptp_pch_caps; chip->ptp_clock = ptp_clock_register(&chip->caps, &pdev->dev); - if (IS_ERR(chip->ptp_clock)) { - ret = PTR_ERR(chip->ptp_clock); - goto err_ptp_clock_reg; - } + if (IS_ERR(chip->ptp_clock)) + return PTR_ERR(chip->ptp_clock); spin_lock_init(&chip->register_lock); @@ -598,8 +504,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id) pch_reset(chip); iowrite32(DEFAULT_ADDEND, &chip->regs->addend); - iowrite32(1, &chip->regs->trgt_lo); - iowrite32(0, &chip->regs->trgt_hi); + iowrite64_lo_hi(1, &chip->regs->trgt_lo); iowrite32(PCH_TSE_TTIPEND, &chip->regs->event); pch_eth_enable_set(chip); @@ -617,21 +522,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_req_irq: ptp_clock_unregister(chip->ptp_clock); -err_ptp_clock_reg: - iounmap(chip->regs); - chip->regs = NULL; -err_ioremap: - release_mem_region(chip->mem_base, chip->mem_size); - -err_req_mem_region: - chip->mem_base = 0; - -err_pci_start: - pci_disable_device(pdev); - -err_pci_en: - kfree(chip); dev_err(&pdev->dev, "probe failed(ret=0x%x)\n", ret); return ret; @@ -646,33 +537,13 @@ static const struct pci_device_id pch_ieee1588_pcidev_id[] = { }; MODULE_DEVICE_TABLE(pci, pch_ieee1588_pcidev_id); -static SIMPLE_DEV_PM_OPS(pch_pm_ops, pch_suspend, pch_resume); - static struct pci_driver pch_driver = { .name = KBUILD_MODNAME, .id_table = pch_ieee1588_pcidev_id, .probe = pch_probe, .remove = pch_remove, - .driver.pm = &pch_pm_ops, }; - -static void __exit ptp_pch_exit(void) -{ - pci_unregister_driver(&pch_driver); -} - -static s32 __init ptp_pch_init(void) -{ - s32 ret; - - /* register the driver with the pci core */ - ret = pci_register_driver(&pch_driver); - - return ret; -} - -module_init(ptp_pch_init); -module_exit(ptp_pch_exit); +module_pci_driver(pch_driver); module_param_string(station, pch_param.station, sizeof(pch_param.station), 0444); diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c index 058b78fac5e3..0a3fb6c115f4 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.c +++ b/drivers/soc/fsl/dpio/qbman-portal.c @@ -743,8 +743,8 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, full_mask = s->eqcr.pi_ci_mask; if (!s->eqcr.available) { eqcr_ci = s->eqcr.ci; - p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; - s->eqcr.ci = *p & full_mask; + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); + s->eqcr.ci &= full_mask; s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, eqcr_ci, s->eqcr.ci); if (!s->eqcr.available) { @@ -887,8 +887,8 @@ int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, full_mask = s->eqcr.pi_ci_mask; if (!s->eqcr.available) { eqcr_ci = s->eqcr.ci; - p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; - s->eqcr.ci = *p & full_mask; + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); + s->eqcr.ci &= full_mask; s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, eqcr_ci, s->eqcr.ci); if (!s->eqcr.available) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index b525d8cdc25b..88a51b242adc 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -8,6 +8,7 @@ #include <linux/jump_label.h> #include <linux/percpu.h> #include <linux/rbtree.h> +#include <net/sock.h> #include <uapi/linux/bpf.h> struct sock; @@ -165,11 +166,23 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags); +/* Opportunistic check to see whether we have any BPF program attached*/ +static inline bool cgroup_bpf_sock_enabled(struct sock *sk, + enum cgroup_bpf_attach_type type) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array *array; + + array = rcu_access_pointer(cgrp->bpf.effective[type]); + return array != &bpf_empty_prog_array.hdr; +} + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \ + if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ @@ -181,7 +194,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk)) \ + if (sk_fullsock(__sk) && \ + cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ } \ @@ -347,7 +361,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, kernel_optval) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT)) \ + if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) && \ + cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT)) \ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ optname, optval, \ optlen, \ @@ -367,7 +382,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, max_optlen, retval) \ ({ \ int __ret = retval; \ - if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ + if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) && \ + cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT)) \ if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ tcp_bpf_bypass_getsockopt, \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8c92c974bd12..2fc7e5c5ef41 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -332,7 +332,10 @@ enum bpf_type_flag { */ MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_ALLOC, + /* MEM is in user address space. */ + MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_USER, }; /* Max number of base types. */ @@ -588,7 +591,7 @@ struct bpf_verifier_ops { const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); + u32 *next_btf_id, enum bpf_type_flag *flag); }; struct bpf_prog_offload_ops { @@ -843,8 +846,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); -int bpf_jit_charge_modmem(u32 pages); -void bpf_jit_uncharge_modmem(u32 pages); +int bpf_jit_charge_modmem(u32 size); +void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, @@ -950,6 +953,7 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool use_bpf_prog_pack; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; @@ -1233,6 +1237,19 @@ struct bpf_prog_array { struct bpf_prog_array_item items[]; }; +struct bpf_empty_prog_array { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +}; + +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'bpf_empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +extern struct bpf_empty_prog_array bpf_empty_prog_array; + struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); @@ -1767,7 +1784,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); + u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id); @@ -1875,11 +1892,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline bool dev_map_can_have_prog(struct bpf_map *map) -{ - return false; -} - static inline void __dev_flush(void) { } @@ -1943,11 +1955,6 @@ static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, return -EOPNOTSUPP; } -static inline bool cpu_map_prog_allowed(struct bpf_map *map) -{ - return false; -} - static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) { @@ -2243,6 +2250,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_strncmp_proto; +extern const struct bpf_func_proto bpf_copy_from_user_task_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2355,6 +2363,8 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void *bpf_arch_text_copy(void *dst, void *src, size_t len); + struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); diff --git a/include/linux/btf.h b/include/linux/btf.h index b12cfe3b12bb..36bc09b8e890 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -238,6 +238,11 @@ static inline bool btf_type_is_var(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; } +static inline bool btf_type_is_type_tag(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG; +} + /* union is only a special case of struct: * all its offsetof(member) == 0 */ @@ -322,6 +327,11 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo( return (const struct btf_var_secinfo *)(t + 1); } +static inline struct btf_param *btf_params(const struct btf_type *t) +{ + return (struct btf_param *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL struct bpf_prog; diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3c1795fdb568..3f31ff400432 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -31,6 +31,9 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __kernel # ifdef STRUCTLEAK_PLUGIN # define __user __attribute__((user)) +# elif defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ + __has_attribute(btf_type_tag) +# define __user __attribute__((btf_type_tag("user"))) # else # define __user # endif diff --git a/include/linux/filter.h b/include/linux/filter.h index d23e999dc032..1cb1af917617 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -548,7 +548,7 @@ struct sock_fprog_kern { #define BPF_IMAGE_ALIGNMENT 8 struct bpf_binary_header { - u32 pages; + u32 size; u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; @@ -886,17 +886,8 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); - set_memory_ro((unsigned long)hdr, hdr->pages); - set_memory_x((unsigned long)hdr, hdr->pages); -} - -static inline struct bpf_binary_header * -bpf_jit_binary_hdr(const struct bpf_prog *fp) -{ - unsigned long real_start = (unsigned long)fp->bpf_func; - unsigned long addr = real_start & PAGE_MASK; - - return (void *)addr; + set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT); + set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); @@ -1068,6 +1059,18 @@ void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_binary_header * +bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, + unsigned int alignment, + struct bpf_binary_header **rw_hdr, + u8 **rw_image, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, + struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header); +void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header); + int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_jit_poke_descriptor *poke); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1e0f8a31f3de..16870f86c74d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -51,7 +51,7 @@ struct ipv6_devconf { __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - __s32 mc_forwarding; + atomic_t mc_forwarding; #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h index 6b3267b49755..ed42bd5f639f 100644 --- a/include/linux/net/intel/i40e_client.h +++ b/include/linux/net/intel/i40e_client.h @@ -26,11 +26,6 @@ struct i40e_client_version { u8 rsvd; }; -enum i40e_client_state { - __I40E_CLIENT_NULL, - __I40E_CLIENT_REGISTERED -}; - enum i40e_client_instance_state { __I40E_CLIENT_INSTANCE_NONE, __I40E_CLIENT_INSTANCE_OPENED, @@ -190,11 +185,6 @@ struct i40e_client { const struct i40e_client_ops *ops; /* client ops provided by the client */ }; -static inline bool i40e_client_is_registered(struct i40e_client *client) -{ - return test_bit(__I40E_CLIENT_REGISTERED, &client->state); -} - void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client); void i40e_client_device_unregister(struct i40e_info *ldev); diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index 1289593411d3..1c1332e4df26 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -32,6 +32,8 @@ enum iidc_rdma_protocol { }; #define IIDC_MAX_USER_PRIORITY 8 +#define IIDC_MAX_DSCP_MAPPING 64 +#define IIDC_DSCP_PFC_MODE 0x1 /* Struct to hold per RDMA Qset info */ struct iidc_rdma_qset_params { @@ -60,6 +62,8 @@ struct iidc_qos_params { u8 vport_relative_bw; u8 vport_priority_type; u8 num_tc; + u8 pfc_mode; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; }; struct iidc_event { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e490b84732d1..5f6e2c0b0c90 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1948,6 +1948,8 @@ enum netdev_ml_priv_type { * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. * @watchdog_dev_tracker: refcount tracker used by watchdog. + * @dev_registered_tracker: tracker for reference held while + * registered * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2282,6 +2284,7 @@ struct net_device { u8 dev_addr_shadow[MAX_ADDR_LEN]; netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; + netdevice_tracker dev_registered_tracker; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -3817,14 +3820,7 @@ extern unsigned int netdev_budget_usecs; /* Called by rtnetlink.c:rtnl_unlock() */ void netdev_run_todo(void); -/** - * dev_put - release reference to device - * @dev: network device - * - * Release reference to device to allow it to be freed. - * Try using dev_put_track() instead. - */ -static inline void dev_put(struct net_device *dev) +static inline void __dev_put(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT @@ -3835,14 +3831,7 @@ static inline void dev_put(struct net_device *dev) } } -/** - * dev_hold - get reference to device - * @dev: network device - * - * Hold reference to device to keep it from being freed. - * Try using dev_hold_track() instead. - */ -static inline void dev_hold(struct net_device *dev) +static inline void __dev_hold(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT @@ -3853,11 +3842,24 @@ static inline void dev_hold(struct net_device *dev) } } +static inline void __netdev_tracker_alloc(struct net_device *dev, + netdevice_tracker *tracker, + gfp_t gfp) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); +#endif +} + +/* netdev_tracker_alloc() can upgrade a prior untracked reference + * taken by dev_get_by_name()/dev_get_by_index() to a tracked one. + */ static inline void netdev_tracker_alloc(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { #ifdef CONFIG_NET_DEV_REFCNT_TRACKER - ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); + refcount_dec(&dev->refcnt_tracker.no_tracker); + __netdev_tracker_alloc(dev, tracker, gfp); #endif } @@ -3873,8 +3875,8 @@ static inline void dev_hold_track(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { if (dev) { - dev_hold(dev); - netdev_tracker_alloc(dev, tracker, gfp); + __dev_hold(dev); + __netdev_tracker_alloc(dev, tracker, gfp); } } @@ -3883,10 +3885,34 @@ static inline void dev_put_track(struct net_device *dev, { if (dev) { netdev_tracker_free(dev, tracker); - dev_put(dev); + __dev_put(dev); } } +/** + * dev_hold - get reference to device + * @dev: network device + * + * Hold reference to device to keep it from being freed. + * Try using dev_hold_track() instead. + */ +static inline void dev_hold(struct net_device *dev) +{ + dev_hold_track(dev, NULL, GFP_ATOMIC); +} + +/** + * dev_put - release reference to device + * @dev: network device + * + * Release reference to device to allow it to be freed. + * Try using dev_put_track() instead. + */ +static inline void dev_put(struct net_device *dev) +{ + dev_put_track(dev, NULL); +} + static inline void dev_replace_track(struct net_device *odev, struct net_device *ndev, netdevice_tracker *tracker, @@ -3895,11 +3921,11 @@ static inline void dev_replace_track(struct net_device *odev, if (odev) netdev_tracker_free(odev, tracker); - dev_hold(ndev); - dev_put(odev); + __dev_hold(ndev); + __dev_put(odev); if (ndev) - netdev_tracker_alloc(ndev, tracker, gfp); + __netdev_tracker_alloc(ndev, tracker, gfp); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 713a0c928b7c..cca149f78d35 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -582,7 +582,6 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); -void phylink_set_10g_modes(unsigned long *mask); void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 60f3453be23e..9ca353ab712b 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -13,6 +13,8 @@ struct ref_tracker_dir { spinlock_t lock; unsigned int quarantine_avail; refcount_t untracked; + refcount_t no_tracker; + bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ #endif @@ -26,7 +28,9 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, INIT_LIST_HEAD(&dir->quarantine); spin_lock_init(&dir->lock); dir->quarantine_avail = quarantine_count; + dir->dead = false; refcount_set(&dir->untracked, 1); + refcount_set(&dir->no_tracker, 1); stack_depot_init(); } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a27bcc4f7e9a..a5adbf6b51e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,12 +314,38 @@ struct sk_buff; * used to translate the reason to string. */ enum skb_drop_reason { - SKB_DROP_REASON_NOT_SPECIFIED, - SKB_DROP_REASON_NO_SOCKET, - SKB_DROP_REASON_PKT_TOO_SMALL, - SKB_DROP_REASON_TCP_CSUM, - SKB_DROP_REASON_SOCKET_FILTER, - SKB_DROP_REASON_UDP_CSUM, + SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ + SKB_DROP_REASON_NO_SOCKET, /* socket not found */ + SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ + SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ + SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ + SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ + SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ + SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current + * host (interface is in promisc + * mode) + */ + SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ + SKB_DROP_REASON_IP_INHDR, /* there is something wrong with + * IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. + * see the document for rp_filter + * in ip-sysctl.rst for more + * information + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 + * is multicast, but L3 is + * unicast. + */ + SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ + SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ + SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ + SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as + * udp packet drop out of + * udp_memory_allocated. + */ SKB_DROP_REASON_MAX, }; diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 18a717fe62eb..fdb5375f0562 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -29,7 +29,7 @@ struct sk_msg_sg { u32 end; u32 size; u32 copybreak; - unsigned long copy; + DECLARE_BITMAP(copy, MAX_MSG_FRAGS + 2); /* The extra two elements: * 1) used for chaining the front and sections when the list becomes * partitioned (e.g. end < start). The crypto APIs require the @@ -38,7 +38,6 @@ struct sk_msg_sg { */ struct scatterlist data[MAX_MSG_FRAGS + 2]; }; -static_assert(BITS_PER_LONG >= NR_MSG_FRAG_IDS); /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { @@ -171,11 +170,6 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end) #define sk_msg_iter_next(msg, which) \ sk_msg_iter_var_next(msg->sg.which) -static inline void sk_msg_clear_meta(struct sk_msg *msg) -{ - memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy)); -} - static inline void sk_msg_init(struct sk_msg *msg) { BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS); @@ -234,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) { struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); - if (test_bit(msg->sg.start, &msg->sg.copy)) { + if (test_bit(msg->sg.start, msg->sg.copy)) { msg->data = NULL; msg->data_end = NULL; } else { @@ -253,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sg_set_page(sge, page, len, offset); sg_unmark_end(sge); - __set_bit(msg->sg.end, &msg->sg.copy); + __set_bit(msg->sg.end, msg->sg.copy); msg->sg.size += len; sk_msg_iter_next(msg, end); } @@ -262,9 +256,9 @@ static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { if (copy_state) - __set_bit(i, &msg->sg.copy); + __set_bit(i, msg->sg.copy); else - __clear_bit(i, &msg->sg.copy); + __clear_bit(i, msg->sg.copy); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; diff --git a/include/linux/uio.h b/include/linux/uio.h index 1198a2bfc9bf..739285fe5a2f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -273,6 +273,23 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) i->count = count; } +static inline int +iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) +{ + size_t shorted = 0; + int npages; + + if (iov_iter_count(i) > max_bytes) { + shorted = iov_iter_count(i) - max_bytes; + iov_iter_truncate(i, max_bytes); + } + npages = iov_iter_npages(i, INT_MAX); + if (shorted) + iov_iter_reexpand(i, iov_iter_count(i) + shorted); + + return npages; +} + struct csum_state { __wsum csum; size_t off; diff --git a/include/net/dsa.h b/include/net/dsa.h index ca8c14b547b4..fd1f62a6e0a8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1281,7 +1281,7 @@ module_exit(dsa_tag_driver_module_exit) /** * module_dsa_tag_drivers() - Helper macro for registering DSA tag * drivers - * @__ops_array: Array of tag driver strucutres + * @__ops_array: Array of tag driver structures * * Helper macro for DSA tag drivers which do not do anything special * in module init/exit. Each module may only use this macro once, and diff --git a/include/net/gro.h b/include/net/gro.h index 8f75802d50fd..a765fedda5c4 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -29,46 +29,50 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; - /* Start offset for remote checksum offload */ - u16 gro_remcsum_start; + /* Used in ipv6_gro_receive() and foo-over-udp */ + u16 proto; /* jiffies when first packet was created/queued */ unsigned long age; - /* Used in ipv6_gro_receive() and foo-over-udp */ - u16 proto; + /* portion of the cb set to zero at every gro iteration */ + struct_group(zeroed, + + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; - /* This is non-zero if the packet may be of the same flow. */ - u8 same_flow:1; + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; - /* Used in tunnel GRO receive */ - u8 encap_mark:1; + /* Used in tunnel GRO receive */ + u8 encap_mark:1; - /* GRO checksum is valid */ - u8 csum_valid:1; + /* GRO checksum is valid */ + u8 csum_valid:1; - /* Number of checksums via CHECKSUM_UNNECESSARY */ - u8 csum_cnt:3; + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; - /* Free the skb? */ - u8 free:2; + /* Free the skb? */ + u8 free:2; #define NAPI_GRO_FREE 1 #define NAPI_GRO_FREE_STOLEN_HEAD 2 - /* Used in foo-over-udp, set in udp[46]_gro_receive */ - u8 is_ipv6:1; + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; - /* Used in GRE, set in fou/gue_gro_receive */ - u8 is_fou:1; + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; - /* Used to determine if flush_id can be ignored */ - u8 is_atomic:1; + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; - /* Number of gro_receive callbacks this packet already went through */ - u8 recursion_counter:4; + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; - /* GRO is done by frag_list pointer chaining. */ - u8 is_flist:1; + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; + ); /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h new file mode 100644 index 000000000000..72f250dffada --- /dev/null +++ b/include/net/inet_dscp.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP) + * + * DSCP is defined in RFC 2474: + * + * 0 1 2 3 4 5 6 7 + * +---+---+---+---+---+---+---+---+ + * | DSCP | CU | + * +---+---+---+---+---+---+---+---+ + * + * DSCP: differentiated services codepoint + * CU: currently unused + * + * The whole DSCP + CU bits form the DS field. + * The DS field is also commonly called TOS or Traffic Class (for IPv6). + * + * Note: the CU bits are now used for Explicit Congestion Notification + * (RFC 3168). + */ + +#ifndef _INET_DSCP_H +#define _INET_DSCP_H + +#include <linux/types.h> + +/* Special type for storing DSCP values. + * + * A dscp_t variable stores a DS field with the CU (ECN) bits cleared. + * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding + * bugs where ECN bits are erroneously taken into account during FIB lookups + * or policy routing. + * + * Note: to get the real DSCP value contained in a dscp_t variable one would + * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have + * a helper for that, but there's currently no users. + */ +typedef u8 __bitwise dscp_t; + +#define INET_DSCP_MASK 0xfc + +static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield) +{ + return (__force dscp_t)(dsfield & INET_DSCP_MASK); +} + +static inline __u8 inet_dscp_to_dsfield(dscp_t dscp) +{ + return (__force __u8)dscp; +} + +static inline bool inet_validate_dscp(__u8 val) +{ + return !(val & ~INET_DSCP_MASK); +} + +#endif /* _INET_DSCP_H */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c4297704bbcb..6a82bcb8813b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -17,6 +17,7 @@ #include <linux/rcupdate.h> #include <net/fib_notifier.h> #include <net/fib_rules.h> +#include <net/inet_dscp.h> #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> @@ -24,7 +25,7 @@ struct fib_config { u8 fc_dst_len; - u8 fc_tos; + dscp_t fc_dscp; u8 fc_protocol; u8 fc_scope; u8 fc_type; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 082f30256f59..f693784e1419 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -15,9 +15,9 @@ #include <linux/refcount.h> #include <linux/jump_label_ratelimit.h> #include <net/if_inet6.h> -#include <net/ndisc.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/inet_dscp.h> #include <net/snmp.h> #include <net/netns/hash.h> @@ -975,6 +975,11 @@ static inline u8 ip6_tclass(__be32 flowinfo) return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } +static inline dscp_t ip6_dscp(__be32 flowinfo) +{ + return inet_dsfield_to_dscp(ip6_tclass(flowinfo)); +} + static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) { return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 0a4779175a52..5052c66e22d2 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_FRAG_H #define _IPV6_FRAG_H +#include <linux/icmpv6.h> #include <linux/kernel.h> #include <net/addrconf.h> #include <net/ipv6.h> diff --git a/include/net/mctp.h b/include/net/mctp.h index 7e35ec79b909..e80a4baf8379 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -45,6 +45,11 @@ static inline bool mctp_address_ok(mctp_eid_t eid) return eid >= 8 && eid < 255; } +static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid) +{ + return match == eid || match == MCTP_ADDR_ANY; +} + static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) { return (struct mctp_hdr *)skb_network_header(skb); @@ -121,7 +126,7 @@ struct mctp_sock { */ struct mctp_sk_key { mctp_eid_t peer_addr; - mctp_eid_t local_addr; + mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */ __u8 tag; /* incoming tag match; invert TO for local */ /* we hold a ref to sk when set */ @@ -158,6 +163,12 @@ struct mctp_sk_key { */ unsigned long dev_flow_state; struct mctp_dev *dev; + + /* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire + * automatically on timeout or response, instead SIOCMCTPDROPTAG + * is used. + */ + bool manual_alloc; }; struct mctp_skb_cb { @@ -234,6 +245,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); void mctp_key_unref(struct mctp_sk_key *key); +struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t daddr, mctp_eid_t saddr, + bool manual, u8 *tagp); /* routing <--> device interface */ unsigned int mctp_default_net(struct net *net); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5b61c462e534..374cc7b260fc 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -513,4 +513,10 @@ static inline void fnhe_genid_bump(struct net *net) atomic_inc(&net->fnhe_genid); } +#ifdef CONFIG_NET +void net_ns_init(void); +#else +static inline void net_ns_init(void) {} +#endif + #endif /* __NET_NET_NAMESPACE_H */ diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 30cdfc4e1615..d145f1966682 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -92,6 +92,11 @@ struct netns_ipv6 { struct sock *tcp_sk; struct sock *igmp_sk; struct sock *mc_autojoin_sk; + + struct hlist_head *inet6_addr_lst; + spinlock_t addrconf_hash_lock; + struct delayed_work addr_chk_work; + #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES struct mr_table *mrt6; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 676cb8ea9e15..a3b57a93228a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -1028,4 +1028,15 @@ struct tc_fifo_qopt_offload { }; }; +#ifdef CONFIG_NET_CLS_ACT +DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc); +void tc_skb_ext_tc_enable(void); +void tc_skb_ext_tc_disable(void); +#define tc_skb_ext_tc_enabled() static_branch_unlikely(&tc_skb_ext_tc) +#else /* CONFIG_NET_CLS_ACT */ +static inline void tc_skb_ext_tc_enable(void) { } +static inline void tc_skb_ext_tc_disable(void) { } +#define tc_skb_ext_tc_enabled() false +#endif + #endif diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 443d45951564..4aa031849668 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -13,7 +13,7 @@ void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); -u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max); +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); void xsk_tx_release(struct xsk_buff_pool *pool); struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id); @@ -142,8 +142,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, return false; } -static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, - u32 max) +static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max) { return 0; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index ddeefc4a1040..5554ee75e7da 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -60,6 +60,7 @@ struct xsk_buff_pool { */ dma_addr_t *dma_pages; struct xdp_buff_xsk *heads; + struct xdp_desc *tx_descs; u64 chunk_mask; u64 addrs_cnt; u32 free_list_cnt; diff --git a/include/trace/events/mctp.h b/include/trace/events/mctp.h index 175b057c507f..165cf25f77a7 100644 --- a/include/trace/events/mctp.h +++ b/include/trace/events/mctp.h @@ -15,6 +15,7 @@ enum { MCTP_TRACE_KEY_REPLIED, MCTP_TRACE_KEY_INVALIDATED, MCTP_TRACE_KEY_CLOSED, + MCTP_TRACE_KEY_DROPPED, }; #endif /* __TRACE_MCTP_ENUMS */ @@ -22,6 +23,7 @@ TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_TIMEOUT); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_REPLIED); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_INVALIDATED); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_CLOSED); +TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_DROPPED); TRACE_EVENT(mctp_key_acquire, TP_PROTO(const struct mctp_sk_key *key), @@ -66,7 +68,8 @@ TRACE_EVENT(mctp_key_release, { MCTP_TRACE_KEY_TIMEOUT, "timeout" }, { MCTP_TRACE_KEY_REPLIED, "replied" }, { MCTP_TRACE_KEY_INVALIDATED, "invalidated" }, - { MCTP_TRACE_KEY_CLOSED, "closed" }) + { MCTP_TRACE_KEY_CLOSED, "closed" }, + { MCTP_TRACE_KEY_DROPPED, "dropped" }) ) ); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a8a64b97504d..cfcfd26399f7 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -16,6 +16,17 @@ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ + EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ + EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ + EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ + EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ + EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ + EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ + UNICAST_IN_L2_MULTICAST) \ + EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \ + EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ + EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ + EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 16a7574292a5..afe3d0d7f5f2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5076,6 +5076,16 @@ union bpf_attr { * associated to *xdp_md*, at *offset*. * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) + * Description + * Read *size* bytes from user space address *user_ptr* in *tsk*'s + * address space, and stores the data in *dst*. *flags* is not + * used yet and is provided for future extensibility. This helper + * can only be used by sleepable programs. + * Return + * 0 on success, or a negative error in case of failure. On error + * *dst* buffer is zeroed out. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5269,6 +5279,7 @@ union bpf_attr { FN(xdp_get_buff_len), \ FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ + FN(copy_from_user_task), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5563,7 +5574,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; @@ -6441,7 +6453,8 @@ struct bpf_sk_lookup { __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ __u32 remote_ip4; /* Network byte order */ __u32 remote_ip6[4]; /* Network byte order */ - __u32 remote_port; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h index 829ffdfcacca..38f6a8fdfd34 100644 --- a/include/uapi/linux/ioam6_iptunnel.h +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -41,6 +41,15 @@ enum { /* IOAM Trace Header */ IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + /* Insertion frequency: + * "k over n" packets (0 < k <= n) + * [0.0001% ... 100%] + */ +#define IOAM6_IPTUNNEL_FREQ_MIN 1 +#define IOAM6_IPTUNNEL_FREQ_MAX 1000000 + IOAM6_IPTUNNEL_FREQ_K, /* u32 */ + IOAM6_IPTUNNEL_FREQ_N, /* u32 */ + __IOAM6_IPTUNNEL_MAX, }; diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 07b0318716fc..154ab56651f1 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -44,7 +44,25 @@ struct sockaddr_mctp_ext { #define MCTP_TAG_MASK 0x07 #define MCTP_TAG_OWNER 0x08 +#define MCTP_TAG_PREALLOC 0x10 #define MCTP_OPT_ADDR_EXT 1 +#define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0) +#define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1) + +struct mctp_ioc_tag_ctl { + mctp_eid_t peer_addr; + + /* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will + * populate with the allocated tag value. Returned tag value will + * always have TO and PREALLOC set. + * + * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from + * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set). + */ + __u8 tag; + __u16 flags; +}; + #endif /* __UAPI_MCTP_H */ diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 66048cc5d7b3..1bbea8f0681e 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -93,6 +93,7 @@ enum net_dm_attr { NET_DM_ATTR_SW_DROPS, /* flag */ NET_DM_ATTR_HW_DROPS, /* flag */ NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */ + NET_DM_ATTR_REASON, /* string */ __NET_DM_ATTR_MAX, NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1 diff --git a/init/Kconfig b/init/Kconfig index e9119bf54b1f..7328d4f25370 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -86,6 +86,10 @@ config CC_HAS_ASM_INLINE config CC_HAS_NO_PROFILE_FN_ATTR def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror) +config PAHOLE_VERSION + int + default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) + config CONSTRUCTORS bool diff --git a/init/main.c b/init/main.c index 65fa2e41a9c0..ada50f5a15e4 100644 --- a/init/main.c +++ b/init/main.c @@ -99,6 +99,7 @@ #include <linux/kcsan.h> #include <linux/init_syscalls.h> #include <linux/stackdepot.h> +#include <net/net_namespace.h> #include <asm/io.h> #include <asm/bugs.h> @@ -1116,6 +1117,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) key_init(); security_init(); dbg_late_init(); + net_ns_init(); vfs_caches_init(); pagecache_init(); signals_init(); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b7aef5b3416d..110029ede71e 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -5,6 +5,7 @@ #include <linux/anon_inodes.h> #include <linux/filter.h> #include <linux/bpf.h> +#include <linux/rcupdate_trace.h> struct bpf_iter_target_info { struct list_head list; @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) { int ret; - rcu_read_lock(); - migrate_disable(); - ret = bpf_prog_run(prog, ctx); - migrate_enable(); - rcu_read_unlock(); + if (prog->aux->sleepable) { + rcu_read_lock_trace(); + migrate_disable(); + might_fault(); + ret = bpf_prog_run(prog, ctx); + migrate_enable(); + rcu_read_unlock_trace(); + } else { + rcu_read_lock(); + migrate_disable(); + ret = bpf_prog_run(prog, ctx); + migrate_enable(); + rcu_read_unlock(); + } /* bpf program can only return 0 or 1: * 0 : okay diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a1c44c17ea9c..11740b300de9 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -419,6 +419,9 @@ static struct btf_type btf_void; static int btf_resolve(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id); +static int btf_func_check(struct btf_verifier_env *env, + const struct btf_type *t); + static bool btf_type_is_modifier(const struct btf_type *t) { /* Some of them is not strictly a C modifier @@ -595,6 +598,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || + btf_type_is_func(t) || btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -3571,9 +3575,24 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env, return 0; } +static int btf_func_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *t = v->t; + u32 next_type_id = t->type; + int err; + + err = btf_func_check(env, t); + if (err) + return err; + + env_stack_pop_resolved(env, next_type_id, 0); + return 0; +} + static struct btf_kind_operations func_ops = { .check_meta = btf_func_check_meta, - .resolve = btf_df_resolve, + .resolve = btf_func_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_ref_type_log, @@ -4194,7 +4213,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); - if (btf_type_is_decl_tag(t)) + if (btf_type_is_decl_tag(t) || btf_type_is_func(t)) return btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); @@ -4284,12 +4303,6 @@ static int btf_check_all_types(struct btf_verifier_env *env) if (err) return err; } - - if (btf_type_is_func(t)) { - err = btf_func_check(env, t); - if (err) - return err; - } } return 0; @@ -4886,6 +4899,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; + const char *tag_value; u32 nr_args, arg; int i, ret; @@ -5038,6 +5052,13 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, info->btf = btf; info->btf_id = t->type; t = btf_type_by_id(btf, t->type); + + if (btf_type_is_type_tag(t)) { + tag_value = __btf_name_by_offset(btf, t->name_off); + if (strcmp(tag_value, "user") == 0) + info->reg_type |= MEM_USER; + } + /* skip modifiers */ while (btf_type_is_modifier(t)) { info->btf_id = t->type; @@ -5064,12 +5085,12 @@ enum bpf_struct_walk_result { static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, - u32 *next_btf_id) + u32 *next_btf_id, enum bpf_type_flag *flag) { u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; const struct btf_type *mtype, *elem_type = NULL; const struct btf_member *member; - const char *tname, *mname; + const char *tname, *mname, *tag_value; u32 vlen, elem_id, mid; again: @@ -5253,7 +5274,8 @@ error: } if (btf_type_is_ptr(mtype)) { - const struct btf_type *stype; + const struct btf_type *stype, *t; + enum bpf_type_flag tmp_flag = 0; u32 id; if (msize != size || off != moff) { @@ -5262,9 +5284,19 @@ error: mname, moff, tname, off, size); return -EACCES; } + + /* check __user tag */ + t = btf_type_by_id(btf, mtype->type); + if (btf_type_is_type_tag(t)) { + tag_value = __btf_name_by_offset(btf, t->name_off); + if (strcmp(tag_value, "user") == 0) + tmp_flag = MEM_USER; + } + stype = btf_type_skip_modifiers(btf, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; + *flag = tmp_flag; return WALK_PTR; } } @@ -5291,13 +5323,14 @@ error: int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype __maybe_unused, - u32 *next_btf_id) + u32 *next_btf_id, enum bpf_type_flag *flag) { + enum bpf_type_flag tmp_flag = 0; int err; u32 id; do { - err = btf_struct_walk(log, btf, t, off, size, &id); + err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag); switch (err) { case WALK_PTR: @@ -5305,6 +5338,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, * we're done. */ *next_btf_id = id; + *flag = tmp_flag; return PTR_TO_BTF_ID; case WALK_SCALAR: return SCALAR_VALUE; @@ -5349,6 +5383,7 @@ bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *need_btf, u32 need_type_id) { const struct btf_type *type; + enum bpf_type_flag flag; int err; /* Are we already done? */ @@ -5359,7 +5394,7 @@ again: type = btf_type_by_id(btf, id); if (!type) return false; - err = btf_struct_walk(log, btf, type, off, 1, &id); + err = btf_struct_walk(log, btf, type, off, 1, &id, &flag); if (err != WALK_STRUCT) return false; @@ -6740,8 +6775,19 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, int ret; btf = btf_get_module_btf(kset->owner); - if (IS_ERR_OR_NULL(btf)) - return btf ? PTR_ERR(btf) : -ENOENT; + if (!btf) { + if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { + pr_err("missing vmlinux BTF, cannot register kfuncs\n"); + return -ENOENT; + } + if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) { + pr_err("missing module BTF, cannot register kfuncs\n"); + return -ENOENT; + } + return 0; + } + if (IS_ERR(btf)) + return PTR_ERR(btf); hook = bpf_prog_type_to_kfunc_hook(prog_type); ret = btf_populate_kfunc_set(btf, hook, kset); @@ -6752,10 +6798,113 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, } EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); +#define MAX_TYPES_ARE_COMPAT_DEPTH 2 + +static +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id, + int level) +{ + const struct btf_type *local_type, *targ_type; + int depth = 32; /* max recursion depth */ + + /* caller made sure that names match (ignoring flavor suffix) */ + local_type = btf_type_by_id(local_btf, local_id); + targ_type = btf_type_by_id(targ_btf, targ_id); + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + +recur: + depth--; + if (depth < 0) + return -EINVAL; + + local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id); + targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_UNKN: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + return 1; + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; + case BTF_KIND_PTR: + local_id = local_type->type; + targ_id = targ_type->type; + goto recur; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + case BTF_KIND_FUNC_PROTO: { + struct btf_param *local_p = btf_params(local_type); + struct btf_param *targ_p = btf_params(targ_type); + __u16 local_vlen = btf_vlen(local_type); + __u16 targ_vlen = btf_vlen(targ_type); + int i, err; + + if (local_vlen != targ_vlen) + return 0; + + for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { + if (level <= 0) + return -EINVAL; + + btf_type_skip_modifiers(local_btf, local_p->type, &local_id); + btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id); + err = __bpf_core_types_are_compat(local_btf, local_id, + targ_btf, targ_id, + level - 1); + if (err <= 0) + return err; + } + + /* tail recurse for return type check */ + btf_type_skip_modifiers(local_btf, local_type->type, &local_id); + btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id); + goto recur; + } + default: + return 0; + } +} + +/* Check local and target types for compatibility. This check is used for + * type-based CO-RE relocations and follow slightly different rules than + * field-based relocations. This function assumes that root types were already + * checked for name match. Beyond that initial root-level name check, names + * are completely ignored. Compatibility rules are as follows: + * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but + * kind should match for local and target types (i.e., STRUCT is not + * compatible with UNION); + * - for ENUMs, the size is ignored; + * - for INT, size and signedness are ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - CONST/VOLATILE/RESTRICT modifiers are ignored; + * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; + * - FUNC_PROTOs are compatible if they have compatible signature: same + * number of input args and compatible return and argument types. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { - return -EOPNOTSUPP; + return __bpf_core_types_are_compat(local_btf, local_id, + targ_btf, targ_id, + MAX_TYPES_ARE_COMPAT_DEPTH); } static bool bpf_core_is_flavor_sep(const char *s) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 279ebbed75a5..098632fdbc45 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1384,20 +1384,6 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, } #ifdef CONFIG_NET -static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, - enum cgroup_bpf_attach_type attach_type) -{ - struct bpf_prog_array *prog_array; - bool empty; - - rcu_read_lock(); - prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]); - empty = bpf_prog_array_is_empty(prog_array); - rcu_read_unlock(); - - return empty; -} - static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen, struct bpf_sockopt_buf *buf) { @@ -1456,19 +1442,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, }; int ret, max_optlen; - /* Opportunistic check to see whether we have any BPF program - * attached to the hook so we don't waste time allocating - * memory and locking the socket. - */ - if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT)) - return 0; - /* Allocate a bit more than the initial user buffer for * BPF program. The canonical use case is overriding * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic). */ max_optlen = max_t(int, 16, *optlen); - max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); if (max_optlen < 0) return max_optlen; @@ -1550,15 +1528,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, }; int ret; - /* Opportunistic check to see whether we have any BPF program - * attached to the hook so we don't waste time allocating - * memory and locking the socket. - */ - if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT)) - return retval; - ctx.optlen = max_optlen; - max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); if (max_optlen < 0) return max_optlen; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 0a1cfd8544b9..42d96549a804 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -537,13 +537,10 @@ long bpf_jit_limit_max __read_mostly; static void bpf_prog_ksym_set_addr(struct bpf_prog *prog) { - const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); - unsigned long addr = (unsigned long)hdr; - WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); prog->aux->ksym.start = (unsigned long) prog->bpf_func; - prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE; + prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len; } static void @@ -808,6 +805,137 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, return slot; } +/* + * BPF program pack allocator. + * + * Most BPF programs are pretty small. Allocating a hole page for each + * program is sometime a waste. Many small bpf program also adds pressure + * to instruction TLB. To solve this issue, we introduce a BPF program pack + * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86) + * to host BPF programs. + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define BPF_PROG_PACK_SIZE HPAGE_PMD_SIZE +#else +#define BPF_PROG_PACK_SIZE PAGE_SIZE +#endif +#define BPF_PROG_CHUNK_SHIFT 6 +#define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT) +#define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1)) +#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE) + +struct bpf_prog_pack { + struct list_head list; + void *ptr; + unsigned long bitmap[BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)]; +}; + +#define BPF_PROG_MAX_PACK_PROG_SIZE BPF_PROG_PACK_SIZE +#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) + +static DEFINE_MUTEX(pack_mutex); +static LIST_HEAD(pack_list); + +static struct bpf_prog_pack *alloc_new_pack(void) +{ + struct bpf_prog_pack *pack; + + pack = kzalloc(sizeof(*pack), GFP_KERNEL); + if (!pack) + return NULL; + pack->ptr = module_alloc(BPF_PROG_PACK_SIZE); + if (!pack->ptr) { + kfree(pack); + return NULL; + } + bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); + list_add_tail(&pack->list, &pack_list); + + set_vm_flush_reset_perms(pack->ptr); + set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + return pack; +} + +static void *bpf_prog_pack_alloc(u32 size) +{ + unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); + struct bpf_prog_pack *pack; + unsigned long pos; + void *ptr = NULL; + + if (size > BPF_PROG_MAX_PACK_PROG_SIZE) { + size = round_up(size, PAGE_SIZE); + ptr = module_alloc(size); + if (ptr) { + set_vm_flush_reset_perms(ptr); + set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); + set_memory_x((unsigned long)ptr, size / PAGE_SIZE); + } + return ptr; + } + mutex_lock(&pack_mutex); + list_for_each_entry(pack, &pack_list, list) { + pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, + nbits, 0); + if (pos < BPF_PROG_CHUNK_COUNT) + goto found_free_area; + } + + pack = alloc_new_pack(); + if (!pack) + goto out; + + pos = 0; + +found_free_area: + bitmap_set(pack->bitmap, pos, nbits); + ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT); + +out: + mutex_unlock(&pack_mutex); + return ptr; +} + +static void bpf_prog_pack_free(struct bpf_binary_header *hdr) +{ + struct bpf_prog_pack *pack = NULL, *tmp; + unsigned int nbits; + unsigned long pos; + void *pack_ptr; + + if (hdr->size > BPF_PROG_MAX_PACK_PROG_SIZE) { + module_memfree(hdr); + return; + } + + pack_ptr = (void *)((unsigned long)hdr & ~(BPF_PROG_PACK_SIZE - 1)); + mutex_lock(&pack_mutex); + + list_for_each_entry(tmp, &pack_list, list) { + if (tmp->ptr == pack_ptr) { + pack = tmp; + break; + } + } + + if (WARN_ONCE(!pack, "bpf_prog_pack bug\n")) + goto out; + + nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); + pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT; + + bitmap_clear(pack->bitmap, pos, nbits); + if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, + BPF_PROG_CHUNK_COUNT, 0) == 0) { + list_del(&pack->list); + module_memfree(pack->ptr); + kfree(pack); + } +out: + mutex_unlock(&pack_mutex); +} + static atomic_long_t bpf_jit_current; /* Can be overridden by an arch's JIT compiler if it has a custom, @@ -833,12 +961,11 @@ static int __init bpf_jit_charge_init(void) } pure_initcall(bpf_jit_charge_init); -int bpf_jit_charge_modmem(u32 pages) +int bpf_jit_charge_modmem(u32 size) { - if (atomic_long_add_return(pages, &bpf_jit_current) > - (bpf_jit_limit >> PAGE_SHIFT)) { + if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) { if (!bpf_capable()) { - atomic_long_sub(pages, &bpf_jit_current); + atomic_long_sub(size, &bpf_jit_current); return -EPERM; } } @@ -846,9 +973,9 @@ int bpf_jit_charge_modmem(u32 pages) return 0; } -void bpf_jit_uncharge_modmem(u32 pages) +void bpf_jit_uncharge_modmem(u32 size) { - atomic_long_sub(pages, &bpf_jit_current); + atomic_long_sub(size, &bpf_jit_current); } void *__weak bpf_jit_alloc_exec(unsigned long size) @@ -867,7 +994,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; - u32 size, hole, start, pages; + u32 size, hole, start; WARN_ON_ONCE(!is_power_of_2(alignment) || alignment > BPF_IMAGE_ALIGNMENT); @@ -877,20 +1004,19 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); - pages = size / PAGE_SIZE; - if (bpf_jit_charge_modmem(pages)) + if (bpf_jit_charge_modmem(size)) return NULL; hdr = bpf_jit_alloc_exec(size); if (!hdr) { - bpf_jit_uncharge_modmem(pages); + bpf_jit_uncharge_modmem(size); return NULL; } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); - hdr->pages = pages; + hdr->size = size; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -903,10 +1029,113 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { - u32 pages = hdr->pages; + u32 size = hdr->size; bpf_jit_free_exec(hdr); - bpf_jit_uncharge_modmem(pages); + bpf_jit_uncharge_modmem(size); +} + +/* Allocate jit binary from bpf_prog_pack allocator. + * Since the allocated memory is RO+X, the JIT engine cannot write directly + * to the memory. To solve this problem, a RW buffer is also allocated at + * as the same time. The JIT engine should calculate offsets based on the + * RO memory address, but write JITed program to the RW buffer. Once the + * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies + * the JITed program to the RO memory. + */ +struct bpf_binary_header * +bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + struct bpf_binary_header **rw_header, + u8 **rw_image, + bpf_jit_fill_hole_t bpf_fill_ill_insns) +{ + struct bpf_binary_header *ro_header; + u32 size, hole, start; + + WARN_ON_ONCE(!is_power_of_2(alignment) || + alignment > BPF_IMAGE_ALIGNMENT); + + /* add 16 bytes for a random section of illegal instructions */ + size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE); + + if (bpf_jit_charge_modmem(size)) + return NULL; + ro_header = bpf_prog_pack_alloc(size); + if (!ro_header) { + bpf_jit_uncharge_modmem(size); + return NULL; + } + + *rw_header = kvmalloc(size, GFP_KERNEL); + if (!*rw_header) { + bpf_prog_pack_free(ro_header); + bpf_jit_uncharge_modmem(size); + return NULL; + } + + /* Fill space with illegal/arch-dep instructions. */ + bpf_fill_ill_insns(*rw_header, size); + (*rw_header)->size = size; + + hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)), + BPF_PROG_CHUNK_SIZE - sizeof(*ro_header)); + start = (get_random_int() % hole) & ~(alignment - 1); + + *image_ptr = &ro_header->image[start]; + *rw_image = &(*rw_header)->image[start]; + + return ro_header; +} + +/* Copy JITed text from rw_header to its final location, the ro_header. */ +int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, + struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header) +{ + void *ptr; + + ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size); + + kvfree(rw_header); + + if (IS_ERR(ptr)) { + bpf_prog_pack_free(ro_header); + return PTR_ERR(ptr); + } + prog->aux->use_bpf_prog_pack = true; + return 0; +} + +/* bpf_jit_binary_pack_free is called in two different scenarios: + * 1) when the program is freed after; + * 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize). + * For case 2), we need to free both the RO memory and the RW buffer. + * Also, ro_header->size in 2) is not properly set yet, so rw_header->size + * is used for uncharge. + */ +void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header) +{ + u32 size = rw_header ? rw_header->size : ro_header->size; + + bpf_prog_pack_free(ro_header); + kvfree(rw_header); + bpf_jit_uncharge_modmem(size); +} + +static inline struct bpf_binary_header * +bpf_jit_binary_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr; + + if (fp->aux->use_bpf_prog_pack) + addr = real_start & BPF_PROG_CHUNK_MASK; + else + addr = real_start & PAGE_MASK; + + return (void *)addr; } /* This symbol is only overridden by archs that have different @@ -918,7 +1147,10 @@ void __weak bpf_jit_free(struct bpf_prog *fp) if (fp->jited) { struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); - bpf_jit_binary_free(hdr); + if (fp->aux->use_bpf_prog_pack) + bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */); + else + bpf_jit_binary_free(hdr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); } @@ -1968,18 +2200,10 @@ static struct bpf_prog_dummy { }, }; -/* to avoid allocating empty bpf_prog_array for cgroups that - * don't have bpf program attached use one global 'empty_prog_array' - * It will not be modified the caller of bpf_prog_array_alloc() - * (since caller requested prog_cnt == 0) - * that pointer should be 'freed' by bpf_prog_array_free() - */ -static struct { - struct bpf_prog_array hdr; - struct bpf_prog *null_prog; -} empty_prog_array = { +struct bpf_empty_prog_array bpf_empty_prog_array = { .null_prog = NULL, }; +EXPORT_SYMBOL(bpf_empty_prog_array); struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) { @@ -1989,12 +2213,12 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) (prog_cnt + 1), flags); - return &empty_prog_array.hdr; + return &bpf_empty_prog_array.hdr; } void bpf_prog_array_free(struct bpf_prog_array *progs) { - if (!progs || progs == &empty_prog_array.hdr) + if (!progs || progs == &bpf_empty_prog_array.hdr) return; kfree_rcu(progs, rcu); } @@ -2453,6 +2677,11 @@ int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return -ENOTSUPP; } +void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + return ERR_PTR(-ENOTSUPP); +} + DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 01cfdf40c838..4e5969fde0b3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -16,6 +16,7 @@ #include <linux/pid_namespace.h> #include <linux/proc_ns.h> #include <linux/security.h> +#include <linux/btf_ids.h> #include "../../lib/kstrtox.h" @@ -671,6 +672,39 @@ const struct bpf_func_proto bpf_copy_from_user_proto = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size, + const void __user *, user_ptr, struct task_struct *, tsk, u64, flags) +{ + int ret; + + /* flags is not used yet */ + if (unlikely(flags)) + return -EINVAL; + + if (unlikely(!size)) + return 0; + + ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0); + if (ret == size) + return 0; + + memset(dst, 0, size); + /* Return -EFAULT for partial read */ + return ret < 0 ? ret : -EFAULT; +} + +const struct bpf_func_proto bpf_copy_from_user_task_proto = { + .func = bpf_copy_from_user_task, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_BTF_ID, + .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg5_type = ARG_ANYTHING +}; + BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) { if (cpu >= nr_cpu_ids) diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 1400ac58178e..baf47d9c7557 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -1,40 +1,16 @@ # SPDX-License-Identifier: GPL-2.0 LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ -LIBBPF_OUT = $(abspath $(obj))/libbpf -LIBBPF_A = $(LIBBPF_OUT)/libbpf.a -LIBBPF_DESTDIR = $(LIBBPF_OUT) -LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include - -# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test -# in tools/scripts/Makefile.include always succeeds when building the kernel -# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". -$(LIBBPF_A): | $(LIBBPF_OUT) - $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \ - DESTDIR=$(LIBBPF_DESTDIR) prefix= \ - $(LIBBPF_OUT)/libbpf.a install_headers - -libbpf_hdrs: $(LIBBPF_A) - -.PHONY: libbpf_hdrs - -$(LIBBPF_OUT): - $(call msg,MKDIR,$@) - $(Q)mkdir -p $@ +LIBBPF_INCLUDE = $(LIBBPF_SRCS)/.. userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ -I $(LIBBPF_INCLUDE) -Wno-unused-result userprogs := bpf_preload_umd -clean-files := libbpf/ - -$(obj)/iterators/iterators.o: | libbpf_hdrs - bpf_preload_umd-objs := iterators/iterators.o -bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz -$(obj)/bpf_preload_umd: $(LIBBPF_A) +$(obj)/bpf_preload_umd: $(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile index b8bd60511227..bfe24f8c5a20 100644 --- a/kernel/bpf/preload/iterators/Makefile +++ b/kernel/bpf/preload/iterators/Makefile @@ -35,15 +35,15 @@ endif .PHONY: all clean -all: iterators.skel.h +all: iterators.lskel.h clean: $(call msg,CLEAN) $(Q)rm -rf $(OUTPUT) iterators -iterators.skel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL) +iterators.lskel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL) $(call msg,GEN-SKEL,$@) - $(Q)$(BPFTOOL) gen skeleton $< > $@ + $(Q)$(BPFTOOL) gen skeleton -L $< > $@ $(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT) diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c index 5d872a705470..4dafe0f4f2b2 100644 --- a/kernel/bpf/preload/iterators/iterators.c +++ b/kernel/bpf/preload/iterators/iterators.c @@ -10,20 +10,36 @@ #include <bpf/libbpf.h> #include <bpf/bpf.h> #include <sys/mount.h> -#include "iterators.skel.h" +#include "iterators.lskel.h" #include "bpf_preload_common.h" int to_kernel = -1; int from_kernel = 0; -static int send_link_to_kernel(struct bpf_link *link, const char *link_name) +static int __bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) +{ + union bpf_attr attr; + int err; + + memset(&attr, 0, sizeof(attr)); + attr.info.bpf_fd = bpf_fd; + attr.info.info_len = *info_len; + attr.info.info = (long) info; + + err = skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!err) + *info_len = attr.info.info_len; + return err; +} + +static int send_link_to_kernel(int link_fd, const char *link_name) { struct bpf_preload_info obj = {}; struct bpf_link_info info = {}; __u32 info_len = sizeof(info); int err; - err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len); + err = __bpf_obj_get_info_by_fd(link_fd, &info, &info_len); if (err) return err; obj.link_id = info.id; @@ -37,7 +53,6 @@ static int send_link_to_kernel(struct bpf_link *link, const char *link_name) int main(int argc, char **argv) { - struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY }; struct iterators_bpf *skel; int err, magic; int debug_fd; @@ -55,7 +70,6 @@ int main(int argc, char **argv) printf("bad start magic %d\n", magic); return 1; } - setrlimit(RLIMIT_MEMLOCK, &rlim); /* libbpf opens BPF object and loads it into the kernel */ skel = iterators_bpf__open_and_load(); if (!skel) { @@ -72,10 +86,10 @@ int main(int argc, char **argv) goto cleanup; /* send two bpf_link IDs with names to the kernel */ - err = send_link_to_kernel(skel->links.dump_bpf_map, "maps.debug"); + err = send_link_to_kernel(skel->links.dump_bpf_map_fd, "maps.debug"); if (err) goto cleanup; - err = send_link_to_kernel(skel->links.dump_bpf_prog, "progs.debug"); + err = send_link_to_kernel(skel->links.dump_bpf_prog_fd, "progs.debug"); if (err) goto cleanup; diff --git a/kernel/bpf/preload/iterators/iterators.lskel.h b/kernel/bpf/preload/iterators/iterators.lskel.h new file mode 100644 index 000000000000..d90562d672d2 --- /dev/null +++ b/kernel/bpf/preload/iterators/iterators.lskel.h @@ -0,0 +1,428 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* THIS FILE IS AUTOGENERATED! */ +#ifndef __ITERATORS_BPF_SKEL_H__ +#define __ITERATORS_BPF_SKEL_H__ + +#include <stdlib.h> +#include <bpf/bpf.h> +#include <bpf/skel_internal.h> + +struct iterators_bpf { + struct bpf_loader_ctx ctx; + struct { + struct bpf_map_desc rodata; + } maps; + struct { + struct bpf_prog_desc dump_bpf_map; + struct bpf_prog_desc dump_bpf_prog; + } progs; + struct { + int dump_bpf_map_fd; + int dump_bpf_prog_fd; + } links; + struct iterators_bpf__rodata { + } *rodata; +}; + +static inline int +iterators_bpf__dump_bpf_map__attach(struct iterators_bpf *skel) +{ + int prog_fd = skel->progs.dump_bpf_map.prog_fd; + int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER); + + if (fd > 0) + skel->links.dump_bpf_map_fd = fd; + return fd; +} + +static inline int +iterators_bpf__dump_bpf_prog__attach(struct iterators_bpf *skel) +{ + int prog_fd = skel->progs.dump_bpf_prog.prog_fd; + int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER); + + if (fd > 0) + skel->links.dump_bpf_prog_fd = fd; + return fd; +} + +static inline int +iterators_bpf__attach(struct iterators_bpf *skel) +{ + int ret = 0; + + ret = ret < 0 ? ret : iterators_bpf__dump_bpf_map__attach(skel); + ret = ret < 0 ? ret : iterators_bpf__dump_bpf_prog__attach(skel); + return ret < 0 ? ret : 0; +} + +static inline void +iterators_bpf__detach(struct iterators_bpf *skel) +{ + skel_closenz(skel->links.dump_bpf_map_fd); + skel_closenz(skel->links.dump_bpf_prog_fd); +} +static void +iterators_bpf__destroy(struct iterators_bpf *skel) +{ + if (!skel) + return; + iterators_bpf__detach(skel); + skel_closenz(skel->progs.dump_bpf_map.prog_fd); + skel_closenz(skel->progs.dump_bpf_prog.prog_fd); + munmap(skel->rodata, 4096); + skel_closenz(skel->maps.rodata.map_fd); + free(skel); +} +static inline struct iterators_bpf * +iterators_bpf__open(void) +{ + struct iterators_bpf *skel; + + skel = calloc(sizeof(*skel), 1); + if (!skel) + goto cleanup; + skel->ctx.sz = (void *)&skel->links - (void *)skel; + skel->rodata = + mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (skel->rodata == (void *) -1) + goto cleanup; + memcpy(skel->rodata, (void *)"\ +\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ +\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\ +\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\ +\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\ +\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0", 98); + skel->maps.rodata.initial_value = (__u64)(long)skel->rodata; + return skel; +cleanup: + iterators_bpf__destroy(skel); + return NULL; +} + +static inline int +iterators_bpf__load(struct iterators_bpf *skel) +{ + struct bpf_load_and_run_opts opts = {}; + int err; + + opts.ctx = (struct bpf_loader_ctx *)skel; + opts.data_sz = 6056; + opts.data = (void *)"\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9f\xeb\x01\0\ +\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\xf9\x04\0\0\0\0\0\0\0\0\0\x02\x02\0\ +\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\0\0\0\x04\ +\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\0\0\0\0\0\ +\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\0\0\0\x20\ +\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xa3\0\0\0\x03\0\0\x04\x18\0\0\0\xb1\0\ +\0\0\x09\0\0\0\0\0\0\0\xb5\0\0\0\x0b\0\0\0\x40\0\0\0\xc0\0\0\0\x0b\0\0\0\x80\0\ +\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xc8\0\0\0\0\0\0\x07\0\0\0\0\xd1\0\0\0\0\0\0\ +\x08\x0c\0\0\0\xd7\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\x94\x01\0\0\x03\0\0\x04\ +\x18\0\0\0\x9c\x01\0\0\x0e\0\0\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xa4\ +\x01\0\0\x0e\0\0\0\xa0\0\0\0\xb0\x01\0\0\0\0\0\x08\x0f\0\0\0\xb6\x01\0\0\0\0\0\ +\x01\x04\0\0\0\x20\0\0\0\xc3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\0\0\0\ +\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xc8\x01\0\0\0\0\0\x01\x04\0\0\0\ +\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x2c\x02\0\0\x02\0\0\x04\x10\0\0\0\x13\0\ +\0\0\x03\0\0\0\0\0\0\0\x3f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x18\0\ +\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x44\x02\0\0\x01\0\0\x0c\ +\x16\0\0\0\x90\x02\0\0\x01\0\0\x04\x08\0\0\0\x99\x02\0\0\x19\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\x02\x1a\0\0\0\xea\x02\0\0\x06\0\0\x04\x38\0\0\0\x9c\x01\0\0\x0e\0\0\ +\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xf7\x02\0\0\x1b\0\0\0\xc0\0\0\0\x08\ +\x03\0\0\x15\0\0\0\0\x01\0\0\x11\x03\0\0\x1d\0\0\0\x40\x01\0\0\x1b\x03\0\0\x1e\ +\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\0\0\0\0\ +\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x65\x03\0\0\x02\0\0\x04\ +\x08\0\0\0\x73\x03\0\0\x0e\0\0\0\0\0\0\0\x7c\x03\0\0\x0e\0\0\0\x20\0\0\0\x1b\ +\x03\0\0\x03\0\0\x04\x18\0\0\0\x86\x03\0\0\x1b\0\0\0\0\0\0\0\x8e\x03\0\0\x21\0\ +\0\0\x40\0\0\0\x94\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\0\0\0\ +\0\0\0\0\0\x02\x24\0\0\0\x98\x03\0\0\x01\0\0\x04\x04\0\0\0\xa3\x03\0\0\x0e\0\0\ +\0\0\0\0\0\x0c\x04\0\0\x01\0\0\x04\x04\0\0\0\x15\x04\0\0\x0e\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x8b\x04\0\0\0\0\0\x0e\x25\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\x9f\x04\ +\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\ +\x20\0\0\0\xb5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\ +\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xca\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xe1\x04\0\0\0\0\0\x0e\x2d\0\0\ +\0\x01\0\0\0\xe9\x04\0\0\x04\0\0\x0f\x62\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\0\x28\ +\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\0\0\ +\x11\0\0\0\xf1\x04\0\0\x01\0\0\x0f\x04\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\x62\ +\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\x74\ +\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\x70\ +\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x30\ +\x3a\x30\0\x2f\x77\x2f\x6e\x65\x74\x2d\x6e\x65\x78\x74\x2f\x6b\x65\x72\x6e\x65\ +\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\ +\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\ +\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\ +\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\ +\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\ +\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\ +\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x75\x6e\x73\x69\x67\x6e\ +\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\0\x30\x3a\x31\0\x09\x73\x74\ +\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\ +\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\ +\x29\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x20\x63\ +\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\x3b\0\x30\ +\x3a\x32\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\ +\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\ +\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\ +\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\ +\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\ +\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\ +\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\ +\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\ +\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\ +\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\ +\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\ +\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\ +\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\ +\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\ +\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\ +\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\ +\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\ +\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\ +\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\ +\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\ +\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\ +\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\ +\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\ +\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\ +\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\ +\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\ +\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\ +\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\ +\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\ +\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\ +\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\ +\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\ +\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\ +\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\ +\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\ +\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\ +\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\ +\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\ +\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\ +\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\ +\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\ +\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\ +\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\ +\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\ +\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\ +\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\ +\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\ +\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2d\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x02\0\0\ +\0\x04\0\0\0\x62\0\0\0\x01\0\0\0\x80\x04\0\0\0\0\0\0\0\0\0\0\x69\x74\x65\x72\ +\x61\x74\x6f\x72\x2e\x72\x6f\x64\x61\x74\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\x2f\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\ +\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\ +\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\ +\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ +\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\ +\x25\x73\x20\x25\x73\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\ +\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\x1b\0\0\ +\0\0\0\x79\x11\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\ +\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\ +\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\ +\0\0\0\0\0\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\ +\x7b\x1a\xf8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\ +\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x23\0\0\0\xb7\x03\0\0\x0e\0\0\0\ +\xb7\x05\0\0\x18\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\ +\0\0\0\0\x07\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x3c\x01\0\x01\0\0\0\x42\0\0\ +\0\x7b\0\0\0\x24\x3c\x01\0\x02\0\0\0\x42\0\0\0\xee\0\0\0\x1d\x44\x01\0\x03\0\0\ +\0\x42\0\0\0\x0f\x01\0\0\x06\x4c\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\x40\ +\x01\0\x05\0\0\0\x42\0\0\0\x1a\x01\0\0\x1d\x40\x01\0\x06\0\0\0\x42\0\0\0\x43\ +\x01\0\0\x06\x58\x01\0\x08\0\0\0\x42\0\0\0\x56\x01\0\0\x03\x5c\x01\0\x0f\0\0\0\ +\x42\0\0\0\xdc\x01\0\0\x02\x64\x01\0\x1f\0\0\0\x42\0\0\0\x2a\x02\0\0\x01\x6c\ +\x01\0\0\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\ +\0\x10\0\0\0\x02\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\ +\x28\0\0\0\x08\0\0\0\x3f\x01\0\0\0\0\0\0\x78\0\0\0\x0d\0\0\0\x3e\0\0\0\0\0\0\0\ +\x88\0\0\0\x0d\0\0\0\xea\0\0\0\0\0\0\0\xa8\0\0\0\x0d\0\0\0\x3f\x01\0\0\0\0\0\0\ +\x1a\0\0\0\x21\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\0\0\0\ +\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\ +\0\0\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\x10\0\0\0\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x6d\ +\x61\x70\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\ +\0\0\0\0\x79\x12\x08\0\0\0\0\0\x15\x02\x3c\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x79\ +\x27\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\ +\0\x07\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\ +\x31\0\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\ +\x6a\xc8\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\ +\x04\0\0\0\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\ +\x78\x30\0\0\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\ +\0\x61\x11\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\ +\0\0\0\0\0\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\ +\xb7\x02\0\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\ +\0\0\0\0\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\ +\xb7\x02\0\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\ +\xff\0\0\0\0\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\ +\xbf\x69\0\0\0\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\ +\xff\0\0\0\0\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\ +\x1a\xe8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\ +\xc8\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x51\0\0\0\xb7\x03\0\0\x11\0\0\0\ +\xb7\x05\0\0\x20\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\ +\0\0\0\0\x17\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x80\x01\0\x01\0\0\0\x42\0\0\ +\0\x7b\0\0\0\x24\x80\x01\0\x02\0\0\0\x42\0\0\0\x60\x02\0\0\x1f\x88\x01\0\x03\0\ +\0\0\x42\0\0\0\x84\x02\0\0\x06\x94\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\ +\x84\x01\0\x05\0\0\0\x42\0\0\0\x9d\x02\0\0\x0e\xa0\x01\0\x06\0\0\0\x42\0\0\0\ +\x1a\x01\0\0\x1d\x84\x01\0\x07\0\0\0\x42\0\0\0\x43\x01\0\0\x06\xa4\x01\0\x09\0\ +\0\0\x42\0\0\0\xaf\x02\0\0\x03\xa8\x01\0\x11\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\ +\xb0\x01\0\x18\0\0\0\x42\0\0\0\x5a\x03\0\0\x06\x04\x01\0\x1b\0\0\0\x42\0\0\0\0\ +\0\0\0\0\0\0\0\x1c\0\0\0\x42\0\0\0\xab\x03\0\0\x0f\x10\x01\0\x1d\0\0\0\x42\0\0\ +\0\xc0\x03\0\0\x2d\x14\x01\0\x1f\0\0\0\x42\0\0\0\xf7\x03\0\0\x0d\x0c\x01\0\x21\ +\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x22\0\0\0\x42\0\0\0\xc0\x03\0\0\x02\x14\x01\0\ +\x25\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x28\0\0\0\x42\0\0\0\0\0\0\0\0\0\ +\0\0\x29\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x2c\0\0\0\x42\0\0\0\x1e\x04\ +\0\0\x0d\x18\x01\0\x2d\0\0\0\x42\0\0\0\x4c\x04\0\0\x1b\x1c\x01\0\x2e\0\0\0\x42\ +\0\0\0\x4c\x04\0\0\x06\x1c\x01\0\x2f\0\0\0\x42\0\0\0\x6f\x04\0\0\x0d\x24\x01\0\ +\x31\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\xb0\x01\0\x40\0\0\0\x42\0\0\0\x2a\x02\0\0\ +\x01\xc0\x01\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\ +\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x14\0\0\0\x3e\0\0\0\ +\0\0\0\0\x28\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x30\0\0\0\x08\0\0\0\x3f\x01\0\0\ +\0\0\0\0\x88\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x98\0\0\0\x1a\0\0\0\xea\0\0\0\0\ +\0\0\0\xb0\0\0\0\x1a\0\0\0\x52\x03\0\0\0\0\0\0\xb8\0\0\0\x1a\0\0\0\x56\x03\0\0\ +\0\0\0\0\xc8\0\0\0\x1f\0\0\0\x84\x03\0\0\0\0\0\0\xe0\0\0\0\x20\0\0\0\xea\0\0\0\ +\0\0\0\0\xf8\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x20\x01\0\0\x24\0\0\0\x3e\0\0\0\ +\0\0\0\0\x58\x01\0\0\x1a\0\0\0\xea\0\0\0\0\0\0\0\x68\x01\0\0\x20\0\0\0\x46\x04\ +\0\0\0\0\0\0\x90\x01\0\0\x1a\0\0\0\x3f\x01\0\0\0\0\0\0\xa0\x01\0\0\x1a\0\0\0\ +\x87\x04\0\0\0\0\0\0\xa8\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x1a\0\0\0\x42\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0\x1c\0\0\ +\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x1a\0\ +\0\0\x01\0\0\0\0\0\0\0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\ +\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\ +\0\0\0\0"; + opts.insns_sz = 2184; + opts.insns = (void *)"\ +\xbf\x16\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x78\xff\xff\xff\xb7\x02\0\ +\0\x88\0\0\0\xb7\x03\0\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x05\0\x14\0\0\0\0\0\x61\ +\xa1\x78\xff\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x7c\xff\ +\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x80\xff\0\0\0\0\xd5\ +\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x84\xff\0\0\0\0\xd5\x01\x01\0\0\ +\0\0\0\x85\0\0\0\xa8\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x01\0\0\0\0\ +\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xbf\x70\0\0\ +\0\0\0\0\x95\0\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ +\x48\x0e\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\ +\0\0\x44\x0e\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\ +\0\0\0\0\x38\x0e\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x05\0\0\ +\x18\x61\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x12\0\ +\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\xb7\x03\0\0\x1c\0\0\0\x85\0\0\0\ +\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xd4\xff\0\0\0\0\x63\x7a\x78\xff\0\0\0\0\ +\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x0e\0\0\x63\x01\0\0\0\ +\0\0\0\x61\x60\x20\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ +\x5c\x0e\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\ +\0\x50\x0e\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\ +\xc5\x07\xc3\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x71\0\0\0\0\0\ +\0\x79\x63\x18\0\0\0\0\0\x15\x03\x04\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\ +\x0e\0\0\xb7\x02\0\0\x62\0\0\0\x85\0\0\0\x94\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\x63\x01\0\ +\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ +\x10\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x0e\0\0\x18\ +\x61\0\0\0\0\0\0\0\0\0\0\x18\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\0\0\ +\x18\x62\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\xa6\0\ +\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xa3\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\x63\x01\0\0\ +\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\xb7\x03\ +\0\0\x04\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x96\xff\0\0\0\0\ +\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x11\0\ +\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\x18\x61\0\0\0\0\ +\0\0\0\0\0\0\x70\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x40\ +\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\ +\0\0\0\0\0\0\0\0\0\x48\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x11\0\0\x7b\x01\ +\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\ +\0\xe8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\x61\ +\0\0\0\0\0\0\0\0\0\0\xe0\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\ +\x61\0\0\0\0\0\0\0\0\0\0\x80\x11\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\ +\x18\x61\0\0\0\0\0\0\0\0\0\0\x84\x11\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\ +\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\xa0\x78\ +\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x11\0\0\x63\x01\0\0\0\0\0\0\x18\ +\x61\0\0\0\0\0\0\0\0\0\0\xf8\x11\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\x0c\0\0\ +\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x60\xff\ +\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x68\x11\0\0\x63\x70\x6c\0\0\0\0\0\x77\x07\ +\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\0\0\0\0\ +\0\0\0\0\x68\x11\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\ +\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd8\x11\0\0\x61\x01\0\0\0\0\0\0\xd5\x01\x02\0\ +\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x4e\xff\0\0\0\0\x63\ +\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x18\x61\0\0\0\0\0\ +\0\0\0\0\0\x10\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x18\x12\ +\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\ +\0\0\0\0\0\0\0\x28\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x50\x17\0\0\x7b\x01\0\0\ +\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ +\x60\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x15\0\0\x18\ +\x61\0\0\0\0\0\0\0\0\0\0\x80\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x17\0\0\x7b\x01\0\0\0\0\0\0\x61\ +\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x17\0\0\x63\x01\0\0\0\0\0\0\ +\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x1c\x17\0\0\x63\x01\0\0\0\0\ +\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\x7b\x01\0\0\ +\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x48\x17\0\0\x63\ +\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x17\0\0\xb7\x02\0\0\x12\0\0\0\ +\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\ +\0\0\xc5\x07\x17\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\x63\x70\x6c\ +\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\ +\x18\x62\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\ +\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x17\0\0\x61\x01\0\0\0\0\ +\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x05\ +\xff\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\x02\0\0\0\ +\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\x63\x06\ +\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\x18\x61\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\xb7\0\0\0\0\0\0\0\ +\x95\0\0\0\0\0\0\0"; + err = bpf_load_and_run(&opts); + if (err < 0) + return err; + skel->rodata = + mmap(skel->rodata, 4096, PROT_READ, MAP_SHARED | MAP_FIXED, + skel->maps.rodata.map_fd, 0); + return 0; +} + +static inline struct iterators_bpf * +iterators_bpf__open_and_load(void) +{ + struct iterators_bpf *skel; + + skel = iterators_bpf__open(); + if (!skel) + return NULL; + if (iterators_bpf__load(skel)) { + iterators_bpf__destroy(skel); + return NULL; + } + return skel; +} + +#endif /* __ITERATORS_BPF_SKEL_H__ */ diff --git a/kernel/bpf/preload/iterators/iterators.skel.h b/kernel/bpf/preload/iterators/iterators.skel.h deleted file mode 100644 index cf9a6a94b3a4..000000000000 --- a/kernel/bpf/preload/iterators/iterators.skel.h +++ /dev/null @@ -1,412 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* THIS FILE IS AUTOGENERATED! */ -#ifndef __ITERATORS_BPF_SKEL_H__ -#define __ITERATORS_BPF_SKEL_H__ - -#include <stdlib.h> -#include <bpf/libbpf.h> - -struct iterators_bpf { - struct bpf_object_skeleton *skeleton; - struct bpf_object *obj; - struct { - struct bpf_map *rodata; - } maps; - struct { - struct bpf_program *dump_bpf_map; - struct bpf_program *dump_bpf_prog; - } progs; - struct { - struct bpf_link *dump_bpf_map; - struct bpf_link *dump_bpf_prog; - } links; - struct iterators_bpf__rodata { - char dump_bpf_map____fmt[35]; - char dump_bpf_map____fmt_1[14]; - char dump_bpf_prog____fmt[32]; - char dump_bpf_prog____fmt_2[17]; - } *rodata; -}; - -static void -iterators_bpf__destroy(struct iterators_bpf *obj) -{ - if (!obj) - return; - if (obj->skeleton) - bpf_object__destroy_skeleton(obj->skeleton); - free(obj); -} - -static inline int -iterators_bpf__create_skeleton(struct iterators_bpf *obj); - -static inline struct iterators_bpf * -iterators_bpf__open_opts(const struct bpf_object_open_opts *opts) -{ - struct iterators_bpf *obj; - - obj = (struct iterators_bpf *)calloc(1, sizeof(*obj)); - if (!obj) - return NULL; - if (iterators_bpf__create_skeleton(obj)) - goto err; - if (bpf_object__open_skeleton(obj->skeleton, opts)) - goto err; - - return obj; -err: - iterators_bpf__destroy(obj); - return NULL; -} - -static inline struct iterators_bpf * -iterators_bpf__open(void) -{ - return iterators_bpf__open_opts(NULL); -} - -static inline int -iterators_bpf__load(struct iterators_bpf *obj) -{ - return bpf_object__load_skeleton(obj->skeleton); -} - -static inline struct iterators_bpf * -iterators_bpf__open_and_load(void) -{ - struct iterators_bpf *obj; - - obj = iterators_bpf__open(); - if (!obj) - return NULL; - if (iterators_bpf__load(obj)) { - iterators_bpf__destroy(obj); - return NULL; - } - return obj; -} - -static inline int -iterators_bpf__attach(struct iterators_bpf *obj) -{ - return bpf_object__attach_skeleton(obj->skeleton); -} - -static inline void -iterators_bpf__detach(struct iterators_bpf *obj) -{ - return bpf_object__detach_skeleton(obj->skeleton); -} - -static inline int -iterators_bpf__create_skeleton(struct iterators_bpf *obj) -{ - struct bpf_object_skeleton *s; - - s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s)); - if (!s) - return -1; - obj->skeleton = s; - - s->sz = sizeof(*s); - s->name = "iterators_bpf"; - s->obj = &obj->obj; - - /* maps */ - s->map_cnt = 1; - s->map_skel_sz = sizeof(*s->maps); - s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz); - if (!s->maps) - goto err; - - s->maps[0].name = "iterator.rodata"; - s->maps[0].map = &obj->maps.rodata; - s->maps[0].mmaped = (void **)&obj->rodata; - - /* programs */ - s->prog_cnt = 2; - s->prog_skel_sz = sizeof(*s->progs); - s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz); - if (!s->progs) - goto err; - - s->progs[0].name = "dump_bpf_map"; - s->progs[0].prog = &obj->progs.dump_bpf_map; - s->progs[0].link = &obj->links.dump_bpf_map; - - s->progs[1].name = "dump_bpf_prog"; - s->progs[1].prog = &obj->progs.dump_bpf_prog; - s->progs[1].link = &obj->links.dump_bpf_prog; - - s->data_sz = 7176; - s->data = (void *)"\ -\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\x48\x18\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0f\0\ -\x0e\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\ -\x1a\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\ -\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x61\x71\0\ -\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\0\0\0\0\0\ -\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\x7b\x1a\xf8\ -\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\ -\0\x18\x02\0\0\x23\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x0e\0\0\0\xb7\x05\0\0\x18\ -\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x79\x12\0\0\0\0\ -\0\0\x79\x26\0\0\0\0\0\0\x79\x11\x08\0\0\0\0\0\x15\x01\x3b\0\0\0\0\0\x79\x17\0\ -\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\ -\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\x31\0\0\0\0\0\0\0\0\0\ -\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\x6a\xc8\ -\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\x04\0\0\0\ -\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\x78\x30\0\0\ -\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\0\x61\x11\ -\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\0\0\0\0\0\ -\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\xb7\x02\0\ -\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\0\0\0\0\ -\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\xb7\x02\0\ -\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\ -\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\xbf\x69\0\0\0\ -\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\ -\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\x1a\xe8\xff\ -\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\xc8\xff\0\0\0\ -\0\x18\x02\0\0\x51\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x11\0\0\0\xb7\x05\0\0\x20\ -\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x20\x20\x69\x64\ -\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\ -\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\ -\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\ -\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\ -\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0\x47\x50\x4c\0\x9f\ -\xeb\x01\0\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\x09\x05\0\0\0\0\0\0\0\0\0\ -\x02\x02\0\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\ -\0\0\0\x04\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\ -\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\ -\0\0\0\x20\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xaf\0\0\0\x03\0\0\x04\x18\0\ -\0\0\xbd\0\0\0\x09\0\0\0\0\0\0\0\xc1\0\0\0\x0b\0\0\0\x40\0\0\0\xcc\0\0\0\x0b\0\ -\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xd4\0\0\0\0\0\0\x07\0\0\0\0\xdd\0\0\ -\0\0\0\0\x08\x0c\0\0\0\xe3\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\xa4\x01\0\0\x03\ -\0\0\x04\x18\0\0\0\xac\x01\0\0\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\ -\0\xb4\x01\0\0\x0e\0\0\0\xa0\0\0\0\xc0\x01\0\0\0\0\0\x08\x0f\0\0\0\xc6\x01\0\0\ -\0\0\0\x01\x04\0\0\0\x20\0\0\0\xd3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\ -\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xd8\x01\0\0\0\0\0\x01\x04\ -\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x3c\x02\0\0\x02\0\0\x04\x10\0\0\0\ -\x13\0\0\0\x03\0\0\0\0\0\0\0\x4f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\ -\x18\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x54\x02\0\0\x01\0\ -\0\x0c\x16\0\0\0\xa0\x02\0\0\x01\0\0\x04\x08\0\0\0\xa9\x02\0\0\x19\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\x02\x1a\0\0\0\xfa\x02\0\0\x06\0\0\x04\x38\0\0\0\xac\x01\0\0\ -\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\0\x07\x03\0\0\x1b\0\0\0\xc0\0\ -\0\0\x18\x03\0\0\x15\0\0\0\0\x01\0\0\x21\x03\0\0\x1d\0\0\0\x40\x01\0\0\x2b\x03\ -\0\0\x1e\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\ -\0\0\0\0\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x75\x03\0\0\x02\0\ -\0\x04\x08\0\0\0\x83\x03\0\0\x0e\0\0\0\0\0\0\0\x8c\x03\0\0\x0e\0\0\0\x20\0\0\0\ -\x2b\x03\0\0\x03\0\0\x04\x18\0\0\0\x96\x03\0\0\x1b\0\0\0\0\0\0\0\x9e\x03\0\0\ -\x21\0\0\0\x40\0\0\0\xa4\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\ -\0\0\0\0\0\0\0\0\x02\x24\0\0\0\xa8\x03\0\0\x01\0\0\x04\x04\0\0\0\xb3\x03\0\0\ -\x0e\0\0\0\0\0\0\0\x1c\x04\0\0\x01\0\0\x04\x04\0\0\0\x25\x04\0\0\x0e\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x9b\x04\0\0\0\0\0\ -\x0e\x25\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\ -\xaf\x04\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\ -\x12\0\0\0\x20\0\0\0\xc5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\ -\0\0\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xda\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xf1\x04\0\0\0\0\0\x0e\ -\x2d\0\0\0\x01\0\0\0\xf9\x04\0\0\x04\0\0\x0f\0\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\ -\0\x28\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\ -\0\0\x11\0\0\0\x01\x05\0\0\x01\0\0\x0f\0\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\ -\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\ -\x74\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\ -\x70\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\ -\x30\x3a\x30\0\x2f\x68\x6f\x6d\x65\x2f\x61\x6c\x72\x75\x61\x2f\x62\x75\x69\x6c\ -\x64\x2f\x6c\x69\x6e\x75\x78\x2f\x6b\x65\x72\x6e\x65\x6c\x2f\x62\x70\x66\x2f\ -\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2f\x69\ -\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\x09\x73\x74\x72\x75\ -\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\x73\x65\x71\x20\x3d\x20\ -\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x3b\0\x62\x70\x66\x5f\ -\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\x73\x65\x73\x73\x69\x6f\ -\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\x65\x71\x5f\x66\x69\x6c\ -\x65\0\x5f\x5f\x75\x36\x34\0\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\x20\x75\x6e\ -\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x30\x3a\x31\0\x09\x73\x74\x72\x75\ -\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\x20\x63\ -\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\x29\0\ -\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\ -\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\ -\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\x29\ -\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\ -\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\ -\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x5c\ -\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\0\ -\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\ -\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\ -\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\ -\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\ -\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\ -\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\ -\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\ -\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\ -\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\ -\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\ -\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\ -\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\ -\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\ -\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\ -\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\ -\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\ -\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\ -\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\ -\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\ -\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\ -\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\ -\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\ -\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\ -\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\ -\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\ -\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\ -\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\ -\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\ -\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\ -\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\ -\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\ -\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\ -\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\ -\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\ -\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\ -\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\ -\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\ -\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\ -\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\ -\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\ -\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\ -\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\ -\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x9f\ -\xeb\x01\0\x20\0\0\0\0\0\0\0\x24\0\0\0\x24\0\0\0\x44\x02\0\0\x68\x02\0\0\xa4\ -\x01\0\0\x08\0\0\0\x31\0\0\0\x01\0\0\0\0\0\0\0\x07\0\0\0\x62\x02\0\0\x01\0\0\0\ -\0\0\0\0\x17\0\0\0\x10\0\0\0\x31\0\0\0\x09\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\ -\x1e\x40\x01\0\x08\0\0\0\x42\0\0\0\x87\0\0\0\x24\x40\x01\0\x10\0\0\0\x42\0\0\0\ -\xfe\0\0\0\x1d\x48\x01\0\x18\0\0\0\x42\0\0\0\x1f\x01\0\0\x06\x50\x01\0\x20\0\0\ -\0\x42\0\0\0\x2e\x01\0\0\x1d\x44\x01\0\x28\0\0\0\x42\0\0\0\x53\x01\0\0\x06\x5c\ -\x01\0\x38\0\0\0\x42\0\0\0\x66\x01\0\0\x03\x60\x01\0\x70\0\0\0\x42\0\0\0\xec\ -\x01\0\0\x02\x68\x01\0\xf0\0\0\0\x42\0\0\0\x3a\x02\0\0\x01\x70\x01\0\x62\x02\0\ -\0\x1a\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\x1e\x84\x01\0\x08\0\0\0\x42\0\0\0\x87\ -\0\0\0\x24\x84\x01\0\x10\0\0\0\x42\0\0\0\x70\x02\0\0\x1f\x8c\x01\0\x18\0\0\0\ -\x42\0\0\0\x94\x02\0\0\x06\x98\x01\0\x20\0\0\0\x42\0\0\0\xad\x02\0\0\x0e\xa4\ -\x01\0\x28\0\0\0\x42\0\0\0\x2e\x01\0\0\x1d\x88\x01\0\x30\0\0\0\x42\0\0\0\x53\ -\x01\0\0\x06\xa8\x01\0\x40\0\0\0\x42\0\0\0\xbf\x02\0\0\x03\xac\x01\0\x80\0\0\0\ -\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xb8\0\0\0\x42\0\0\0\x6a\x03\0\0\x06\x08\ -\x01\0\xd0\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\x42\0\0\0\xbb\x03\0\0\x0f\ -\x14\x01\0\xe0\0\0\0\x42\0\0\0\xd0\x03\0\0\x2d\x18\x01\0\xf0\0\0\0\x42\0\0\0\ -\x07\x04\0\0\x0d\x10\x01\0\0\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x08\x01\0\0\x42\ -\0\0\0\xd0\x03\0\0\x02\x18\x01\0\x20\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\ -\0\x38\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x40\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\ -\x1c\x01\0\x58\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\0\x60\x01\0\0\x42\0\0\ -\0\x5c\x04\0\0\x1b\x20\x01\0\x68\x01\0\0\x42\0\0\0\x5c\x04\0\0\x06\x20\x01\0\ -\x70\x01\0\0\x42\0\0\0\x7f\x04\0\0\x0d\x28\x01\0\x78\x01\0\0\x42\0\0\0\0\0\0\0\ -\0\0\0\0\x80\x01\0\0\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xf8\x01\0\0\x42\0\0\0\ -\x3a\x02\0\0\x01\xc4\x01\0\x10\0\0\0\x31\0\0\0\x07\0\0\0\0\0\0\0\x02\0\0\0\x3e\ -\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x02\0\0\0\xfa\0\ -\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x70\0\0\0\x0d\0\0\0\x3e\0\ -\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xfa\0\0\0\0\0\0\0\xa0\0\0\0\x0d\0\0\0\x2a\x01\ -\0\0\0\0\0\0\x62\x02\0\0\x12\0\0\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\ -\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xfa\0\0\0\0\0\0\0\x20\0\0\0\ -\x18\0\0\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x80\0\0\0\ -\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\xa8\0\0\0\ -\x1a\0\0\0\x62\x03\0\0\0\0\0\0\xb0\0\0\0\x1a\0\0\0\x66\x03\0\0\0\0\0\0\xc0\0\0\ -\0\x1f\0\0\0\x94\x03\0\0\0\0\0\0\xd8\0\0\0\x20\0\0\0\xfa\0\0\0\0\0\0\0\xf0\0\0\ -\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x24\0\0\0\x3e\0\0\0\0\0\0\0\x50\x01\ -\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\x60\x01\0\0\x20\0\0\0\x56\x04\0\0\0\0\0\0\x88\ -\x01\0\0\x1a\0\0\0\x2a\x01\0\0\0\0\0\0\x98\x01\0\0\x1a\0\0\0\x97\x04\0\0\0\0\0\ -\0\xa0\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\x91\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xe6\0\0\ -\0\0\0\x02\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\0\0\x02\0\xf0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\xdf\0\0\0\0\0\x03\0\x78\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\xd1\0\0\0\0\0\x03\0\x80\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xca\0\0\0\0\0\x03\0\ -\xf8\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x14\0\0\0\x01\0\x04\0\0\0\0\0\0\0\0\0\x23\ -\0\0\0\0\0\0\0\x04\x01\0\0\x01\0\x04\0\x23\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x28\ -\0\0\0\x01\0\x04\0\x31\0\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\xed\0\0\0\x01\0\x04\0\ -\x51\0\0\0\0\0\0\0\x11\0\0\0\0\0\0\0\0\0\0\0\x03\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\ -\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc2\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\ -\x04\0\0\0\0\0\0\0\x3d\0\0\0\x12\0\x02\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x5b\ -\0\0\0\x12\0\x03\0\0\0\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\x48\0\0\0\0\0\0\0\x01\0\ -\0\0\x0d\0\0\0\xc8\0\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\x50\0\0\0\0\0\0\0\x01\0\0\ -\0\x0d\0\0\0\xd0\x01\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\xf0\x03\0\0\0\0\0\0\x0a\0\ -\0\0\x0d\0\0\0\xfc\x03\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x08\x04\0\0\0\0\0\0\x0a\ -\0\0\0\x0d\0\0\0\x14\x04\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x2c\x04\0\0\0\0\0\0\0\ -\0\0\0\x0e\0\0\0\x2c\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x50\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x60\0\0\0\0\0\0\0\0\0\0\0\x0b\0\ -\0\0\x70\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\ -\x90\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xb0\0\ -\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xd0\0\0\0\0\ -\0\0\0\0\0\0\0\x0b\0\0\0\xe8\0\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\0\0\0\0\0\0\0\ -\0\0\0\0\x0c\0\0\0\x08\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x01\0\0\0\0\0\0\0\ -\0\0\0\x0c\0\0\0\x28\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x01\0\0\0\0\0\0\0\0\ -\0\0\x0c\0\0\0\x48\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x01\0\0\0\0\0\0\0\0\0\ -\0\x0c\0\0\0\x68\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x01\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x88\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x98\x01\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\xa8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xb8\x01\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\xc8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xd8\x01\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\xe8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\x01\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x28\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x02\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x48\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x02\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x68\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x02\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x94\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa4\x02\0\0\0\0\0\0\0\0\0\0\ -\x0b\0\0\0\xb4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc4\x02\0\0\0\0\0\0\0\0\0\0\ -\x0b\0\0\0\xd4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xe4\x02\0\0\0\0\0\0\0\0\0\0\ -\x0b\0\0\0\xf4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x0c\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x1c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x2c\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x3c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4c\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x5c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x6c\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x7c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x8c\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x9c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xac\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\xbc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xcc\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\xdc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xec\x03\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\xfc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x0c\x04\0\0\0\0\0\0\0\0\0\0\ -\x0c\0\0\0\x1c\x04\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4d\x4e\x40\x41\x42\x43\x4c\0\ -\x2e\x74\x65\x78\x74\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\x2e\x65\x78\x74\0\x64\ -\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\ -\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\ -\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x2e\x72\x65\x6c\x69\x74\x65\ -\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\ -\x72\x6f\x67\0\x2e\x72\x65\x6c\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\ -\x67\0\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\x64\x72\x73\x69\x67\0\x6c\x69\x63\x65\ -\x6e\x73\x65\0\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\ -\x2e\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x2e\x72\x6f\x64\ -\x61\x74\x61\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\0\x4c\x49\x43\x45\x4e\x53\x45\0\ -\x4c\x42\x42\x31\x5f\x37\0\x4c\x42\x42\x31\x5f\x36\0\x4c\x42\x42\x30\x5f\x34\0\ -\x4c\x42\x42\x31\x5f\x33\0\x4c\x42\x42\x30\x5f\x33\0\x64\x75\x6d\x70\x5f\x62\ -\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x64\x75\x6d\ -\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\ -\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4e\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\x6d\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\x40\x01\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\xb1\0\0\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\x03\0\ -\0\0\0\0\0\x62\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\x89\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\x03\0\0\0\0\0\0\x04\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbd\0\0\0\x01\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xae\x03\0\0\0\0\0\0\x3d\x09\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x01\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\xeb\x0c\0\0\0\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa9\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\x18\x11\0\0\0\0\0\0\x98\x01\0\0\0\0\0\0\x0e\0\0\0\x0e\0\0\0\x08\0\0\ -\0\0\0\0\0\x18\0\0\0\0\0\0\0\x4a\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\xb0\x12\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x02\0\0\0\x08\0\0\0\0\0\0\0\ -\x10\0\0\0\0\0\0\0\x69\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd0\x12\ -\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\ -\0\0\0\0\xb9\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xf0\x12\0\0\0\0\0\ -\0\x50\0\0\0\0\0\0\0\x08\0\0\0\x06\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\ -\x07\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x13\0\0\0\0\0\0\xe0\ -\x03\0\0\0\0\0\0\x08\0\0\0\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x7b\0\ -\0\0\x03\x4c\xff\x6f\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\0\0\0\0\x07\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa1\0\0\0\x03\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x27\x17\0\0\0\0\0\0\x1a\x01\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; - - return 0; -err: - bpf_object__destroy_skeleton(s); - return -1; -} - -#endif /* __ITERATORS_BPF_SKEL_H__ */ diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5e7edf913060..7224691df2ec 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -213,7 +213,7 @@ static void __bpf_tramp_image_put_deferred(struct work_struct *work) im = container_of(work, struct bpf_tramp_image, work); bpf_image_ksym_del(&im->ksym); bpf_jit_free_exec(im->image); - bpf_jit_uncharge_modmem(1); + bpf_jit_uncharge_modmem(PAGE_SIZE); percpu_ref_exit(&im->pcref); kfree_rcu(im, rcu); } @@ -310,7 +310,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) if (!im) goto out; - err = bpf_jit_charge_modmem(1); + err = bpf_jit_charge_modmem(PAGE_SIZE); if (err) goto out_free_im; @@ -332,7 +332,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) out_free_image: bpf_jit_free_exec(im->image); out_uncharge: - bpf_jit_uncharge_modmem(1); + bpf_jit_uncharge_modmem(PAGE_SIZE); out_free_im: kfree(im); out: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dcf065ec2774..bbef86cb4e72 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -536,7 +536,7 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) static const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type) { - char postfix[16] = {0}, prefix[16] = {0}; + char postfix[16] = {0}, prefix[32] = {0}; static const char * const str[] = { [NOT_INIT] = "?", [SCALAR_VALUE] = "inv", @@ -570,9 +570,11 @@ static const char *reg_type_str(struct bpf_verifier_env *env, } if (type & MEM_RDONLY) - strncpy(prefix, "rdonly_", 16); + strncpy(prefix, "rdonly_", 32); if (type & MEM_ALLOC) - strncpy(prefix, "alloc_", 16); + strncpy(prefix, "alloc_", 32); + if (type & MEM_USER) + strncpy(prefix, "user_", 32); snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix); @@ -1547,14 +1549,15 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, static void mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, enum bpf_reg_type reg_type, - struct btf *btf, u32 btf_id) + struct btf *btf, u32 btf_id, + enum bpf_type_flag flag) { if (reg_type == SCALAR_VALUE) { mark_reg_unknown(env, regs, regno); return; } mark_reg_known_zero(env, regs, regno); - regs[regno].type = PTR_TO_BTF_ID; + regs[regno].type = PTR_TO_BTF_ID | flag; regs[regno].btf = btf; regs[regno].btf_id = btf_id; } @@ -4152,6 +4155,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg = regs + regno; const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); const char *tname = btf_name_by_offset(reg->btf, t->name_off); + enum bpf_type_flag flag = 0; u32 btf_id; int ret; @@ -4171,9 +4175,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EACCES; } + if (reg->type & MEM_USER) { + verbose(env, + "R%d is ptr_%s access user memory: off=%d\n", + regno, tname, off); + return -EACCES; + } + if (env->ops->btf_struct_access) { ret = env->ops->btf_struct_access(&env->log, reg->btf, t, - off, size, atype, &btf_id); + off, size, atype, &btf_id, &flag); } else { if (atype != BPF_READ) { verbose(env, "only read is supported\n"); @@ -4181,14 +4192,14 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } ret = btf_struct_access(&env->log, reg->btf, t, off, size, - atype, &btf_id); + atype, &btf_id, &flag); } if (ret < 0) return ret; if (atype == BPF_READ && value_regno >= 0) - mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id); + mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); return 0; } @@ -4201,6 +4212,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, { struct bpf_reg_state *reg = regs + regno; struct bpf_map *map = reg->map_ptr; + enum bpf_type_flag flag = 0; const struct btf_type *t; const char *tname; u32 btf_id; @@ -4238,12 +4250,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, return -EACCES; } - ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id); + ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag); if (ret < 0) return ret; if (value_regno >= 0) - mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id); + mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag); return 0; } @@ -4444,7 +4456,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err < 0) return err; - err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf, &btf_id); + err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf, + &btf_id); if (err) verbose_linfo(env, insn_idx, "; "); if (!err && t == BPF_READ && value_regno >= 0) { @@ -13054,6 +13067,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->jited = 1; prog->bpf_func = func[0]->bpf_func; + prog->jited_len = func[0]->jited_len; prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt; bpf_prog_jit_attempt_done(prog); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 06a9e220069e..a2024ba32a20 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1235,6 +1235,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; + case BPF_FUNC_copy_from_user_task: + return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; case BPF_FUNC_per_cpu_ptr: diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 14b89aa37c5c..1555da672275 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -296,7 +296,7 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) - depends on !DEBUG_INFO_BTF + depends on !DEBUG_INFO_BTF || PAHOLE_VERSION >= 121 help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc 5.0+ accepts the -gdwarf-5 flag but only had partial support for some @@ -323,7 +323,15 @@ config DEBUG_INFO_BTF DWARF type info into equivalent deduplicated BTF type info. config PAHOLE_HAS_SPLIT_BTF - def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119") + def_bool PAHOLE_VERSION >= 119 + +config PAHOLE_HAS_BTF_TAG + def_bool PAHOLE_VERSION >= 123 + depends on CC_IS_CLANG + help + Decide whether pahole emits btf_tag attributes (btf_type_tag and + btf_decl_tag) or not. Currently only clang compiler implements + these attributes, so make the config depend on CC_IS_CLANG. config DEBUG_INFO_BTF_MODULES def_bool y diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index a6789c0c626b..dc7b14aa3431 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -20,6 +20,7 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir) unsigned long flags; bool leak = false; + dir->dead = true; spin_lock_irqsave(&dir->lock, flags); list_for_each_entry_safe(tracker, n, &dir->quarantine, head) { list_del(&tracker->head); @@ -37,6 +38,7 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir) spin_unlock_irqrestore(&dir->lock, flags); WARN_ON_ONCE(leak); WARN_ON_ONCE(refcount_read(&dir->untracked) != 1); + WARN_ON_ONCE(refcount_read(&dir->no_tracker) != 1); } EXPORT_SYMBOL(ref_tracker_dir_exit); @@ -72,6 +74,12 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir, gfp_t gfp_mask = gfp; unsigned long flags; + WARN_ON_ONCE(dir->dead); + + if (!trackerp) { + refcount_inc(&dir->no_tracker); + return 0; + } if (gfp & __GFP_DIRECT_RECLAIM) gfp_mask |= __GFP_NOFAIL; *trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask); @@ -81,7 +89,6 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir, return -ENOMEM; } nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1); - nr_entries = filter_irq_stacks(entries, nr_entries); tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp); spin_lock_irqsave(&dir->lock, flags); @@ -95,17 +102,23 @@ int ref_tracker_free(struct ref_tracker_dir *dir, struct ref_tracker **trackerp) { unsigned long entries[REF_TRACKER_STACK_ENTRIES]; - struct ref_tracker *tracker = *trackerp; depot_stack_handle_t stack_handle; + struct ref_tracker *tracker; unsigned int nr_entries; unsigned long flags; + WARN_ON_ONCE(dir->dead); + + if (!trackerp) { + refcount_dec(&dir->no_tracker); + return 0; + } + tracker = *trackerp; if (!tracker) { refcount_dec(&dir->untracked); return -EEXIST; } nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1); - nr_entries = filter_irq_stacks(entries, nr_entries); stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC); spin_lock_irqsave(&dir->lock, flags); diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index a068757eabaf..7b3341cef926 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -5,6 +5,7 @@ * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de> */ +#include <linux/if_arp.h> #include <linux/module.h> #include <net/6lowpan.h> diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index f4004cf0ff6f..9f311fddfaf9 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -134,7 +134,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); - if (in6_dev && in6_dev->cnf.mc_forwarding) + if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR6; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index fbc896323bec..d0e54e30658a 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -145,7 +145,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id) + u32 *next_btf_id, + enum bpf_type_flag *flag) { const struct btf_type *state; s32 type_id; @@ -162,7 +163,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, return -EACCES; } - err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); if (err < 0) return err; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 65b52b4bd6e1..f08034500813 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -154,7 +154,8 @@ static int bpf_test_finish(const union bpf_attr *kattr, goto out; if (sinfo) { - int i, offset = len, data_len; + int i, offset = len; + u32 data_len; for (i = 0; i < sinfo->nr_frags; i++) { skb_frag_t *frag = &sinfo->frags[i]; @@ -164,7 +165,7 @@ static int bpf_test_finish(const union bpf_attr *kattr, break; } - data_len = min_t(int, copy_size - offset, + data_len = min_t(u32, copy_size - offset, skb_frag_size(frag)); if (copy_to_user(data_out + offset, @@ -960,7 +961,12 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, while (size < kattr->test.data_size_in) { struct page *page; skb_frag_t *frag; - int data_len; + u32 data_len; + + if (sinfo->nr_frags == MAX_SKB_FRAGS) { + ret = -ENOMEM; + goto out; + } page = alloc_page(GFP_KERNEL); if (!page) { @@ -971,7 +977,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, frag = &sinfo->frags[sinfo->nr_frags++]; __skb_frag_set_page(frag, page); - data_len = min_t(int, kattr->test.data_size_in - size, + data_len = min_t(u32, kattr->test.data_size_in - size, PAGE_SIZE); skb_frag_size_set(frag, data_len); @@ -1141,7 +1147,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) goto out; - if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { + if (user_ctx->local_port > U16_MAX) { ret = -ERANGE; goto out; } @@ -1149,7 +1155,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat ctx.family = (u16)user_ctx->family; ctx.protocol = (u16)user_ctx->protocol; ctx.dport = (u16)user_ctx->local_port; - ctx.sport = (__force __be16)user_ctx->remote_port; + ctx.sport = user_ctx->remote_port; switch (ctx.family) { case AF_INET: diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 440139706130..52dd0b6835bc 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -268,7 +268,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev, err = caifd->layer.up->receive(caifd->layer.up, pkt); - /* For -EILSEQ the packet is not freed so so it now */ + /* For -EILSEQ the packet is not freed so free it now */ if (err == -EILSEQ) cfpkt_destroy(pkt); diff --git a/net/can/gw.c b/net/can/gw.c index d8861e862f15..24221352e059 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net) return 0; } -static void __net_exit cangw_pernet_exit(struct net *net) +static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list) { + struct net *net; + rtnl_lock(); - cgw_remove_all_jobs(net); + list_for_each_entry(net, net_list, exit_list) + cgw_remove_all_jobs(net); rtnl_unlock(); } static struct pernet_operations cangw_pernet_ops = { .init = cangw_pernet_init, - .exit = cangw_pernet_exit, + .exit_batch = cangw_pernet_exit_batch, }; static __init int cgw_module_init(void) diff --git a/net/core/dev.c b/net/core/dev.c index f79744d99413..f5ef51601081 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9683,8 +9683,10 @@ int register_netdevice(struct net_device *dev) linkwatch_init_dev(dev); dev_init_scheduler(dev); - dev_hold(dev); + + dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL); list_netdevice(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); /* If the device has permanent device address, driver should @@ -10172,7 +10174,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) goto free_dev; - dev_hold(dev); + __dev_hold(dev); #else refcount_set(&dev->dev_refcnt, 1); #endif @@ -10449,7 +10451,7 @@ void unregister_netdevice_many(struct list_head *head) synchronize_net(); list_for_each_entry(dev, head, unreg_list) { - dev_put(dev); + dev_put_track(dev, &dev->dev_registered_tracker); net_set_todo(dev); } @@ -10732,8 +10734,7 @@ static int __net_init netdev_init(struct net *net) BUILD_BUG_ON(GRO_HASH_BUCKETS > 8 * sizeof_field(struct napi_struct, gro_bitmask)); - if (net != &init_net) - INIT_LIST_HEAD(&net->dev_base_head); + INIT_LIST_HEAD(&net->dev_base_head); net->dev_name_head = netdev_create_hash(); if (net->dev_name_head == NULL) @@ -10849,14 +10850,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = { .exit = netdev_exit, }; -static void __net_exit default_device_exit(struct net *net) +static void __net_exit default_device_exit_net(struct net *net) { struct net_device *dev, *aux; /* * Push all migratable network devices back to the * initial network namespace */ - rtnl_lock(); + ASSERT_RTNL(); for_each_netdev_safe(net, dev, aux) { int err; char fb_name[IFNAMSIZ]; @@ -10880,22 +10881,22 @@ static void __net_exit default_device_exit(struct net *net) BUG(); } } - rtnl_unlock(); } static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) { - /* Return with the rtnl_lock held when there are no network + /* Return (with the rtnl_lock held) when there are no network * devices unregistering in any network namespace in net_list. */ - struct net *net; - bool unregistering; DEFINE_WAIT_FUNC(wait, woken_wake_function); + bool unregistering; + struct net *net; + ASSERT_RTNL(); add_wait_queue(&netdev_unregistering_wq, &wait); for (;;) { unregistering = false; - rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { if (net->dev_unreg_count > 0) { unregistering = true; @@ -10907,6 +10908,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) __rtnl_unlock(); wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + rtnl_lock(); } remove_wait_queue(&netdev_unregistering_wq, &wait); } @@ -10922,6 +10924,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) struct net *net; LIST_HEAD(dev_kill_list); + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + default_device_exit_net(net); + cond_resched(); + } /* To prevent network device cleanup code from dereferencing * loopback devices or network devices that have been freed * wait here for all pending unregistrations to complete, @@ -10934,6 +10941,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) * default_device_exit_batch. */ rtnl_lock_unregistering(net_list); + list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) @@ -10947,7 +10955,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } static struct pernet_operations __net_initdata default_device_ops = { - .exit = default_device_exit, .exit_batch = default_device_exit_batch, }; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 7b288a121a41..4641126b8a20 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -48,6 +48,19 @@ static int trace_state = TRACE_OFF; static bool monitor_hw; +#undef EM +#undef EMe + +#define EM(a, b) [a] = #b, +#define EMe(a, b) [a] = #b + +/* drop_reasons is used to translate 'enum skb_drop_reason' to string, + * which is reported to user space. + */ +static const char * const drop_reasons[] = { + TRACE_SKB_DROP_REASON +}; + /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. @@ -126,6 +139,7 @@ struct net_dm_skb_cb { struct devlink_trap_metadata *hw_metadata; void *pc; }; + enum skb_drop_reason reason; }; #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0])) @@ -498,6 +512,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore, { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; + struct net_dm_skb_cb *cb; struct sk_buff *nskb; unsigned long flags; @@ -508,7 +523,11 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore, if (!nskb) return; - NET_DM_SKB_CB(nskb)->pc = location; + if ((unsigned int)reason >= SKB_DROP_REASON_MAX) + reason = SKB_DROP_REASON_NOT_SPECIFIED; + cb = NET_DM_SKB_CB(nskb); + cb->reason = reason; + cb->pc = location; /* Override the timestamp because we care about the time when the * packet was dropped. */ @@ -553,7 +572,8 @@ static size_t net_dm_in_port_size(void) #define NET_DM_MAX_SYMBOL_LEN 40 -static size_t net_dm_packet_report_size(size_t payload_len) +static size_t net_dm_packet_report_size(size_t payload_len, + enum skb_drop_reason reason) { size_t size; @@ -574,6 +594,8 @@ static size_t net_dm_packet_report_size(size_t payload_len) nla_total_size(sizeof(u32)) + /* NET_DM_ATTR_PROTO */ nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_REASON */ + nla_total_size(strlen(drop_reasons[reason]) + 1) + /* NET_DM_ATTR_PAYLOAD */ nla_total_size(payload_len); } @@ -606,7 +628,7 @@ nla_put_failure: static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb, size_t payload_len) { - u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc; + struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb); char buf[NET_DM_MAX_SYMBOL_LEN]; struct nlattr *attr; void *hdr; @@ -620,10 +642,15 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb, if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD)) + if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc, + NET_DM_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_string(msg, NET_DM_ATTR_REASON, + drop_reasons[cb->reason])) goto nla_put_failure; - snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc); + snprintf(buf, sizeof(buf), "%pS", cb->pc); if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf)) goto nla_put_failure; @@ -679,7 +706,9 @@ static void net_dm_packet_report(struct sk_buff *skb) if (net_dm_trunc_len) payload_len = min_t(size_t, net_dm_trunc_len, payload_len); - msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL); + msg = nlmsg_new(net_dm_packet_report_size(payload_len, + NET_DM_SKB_CB(skb)->reason), + GFP_KERNEL); if (!msg) goto out; diff --git a/net/core/filter.c b/net/core/filter.c index 9615ae1ab530..818244068c2d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2603,7 +2603,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, * account for the headroom. */ bytes_sg_total = start - offset + bytes; - if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len) + if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len) goto out; /* At this point we need to linearize multiple scatterlist @@ -2809,7 +2809,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, /* Place newly allocated data buffer */ sk_mem_charge(msg->sk, len); msg->sg.size += len; - __clear_bit(new, &msg->sg.copy); + __clear_bit(new, msg->sg.copy); sg_set_page(&msg->sg.data[new], page, len + copy, 0); if (rsge.length) { get_page(sg_page(&rsge)); @@ -8275,6 +8275,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); + int field_size; if (off < 0 || off >= sizeof(struct bpf_sock)) return false; @@ -8286,7 +8287,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, protocol): - case offsetof(struct bpf_sock, dst_port): case offsetof(struct bpf_sock, src_port): case offsetof(struct bpf_sock, rx_queue_mapping): case bpf_ctx_range(struct bpf_sock, src_ip4): @@ -8295,6 +8295,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); + case bpf_ctx_range(struct bpf_sock, dst_port): + field_size = size == size_default ? + size_default : sizeof_field(struct bpf_sock, dst_port); + bpf_ctx_record_field_size(info, field_size); + return bpf_ctx_narrow_access_ok(off, size, field_size); + case offsetofend(struct bpf_sock, dst_port) ... + offsetof(struct bpf_sock, dst_ip4) - 1: + return false; } return size == size_default; @@ -10845,7 +10853,8 @@ static bool sk_lookup_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): - case bpf_ctx_range(struct bpf_sk_lookup, remote_port): + case offsetof(struct bpf_sk_lookup, remote_port) ... + offsetof(struct bpf_sk_lookup, local_ip4) - 1: case bpf_ctx_range(struct bpf_sk_lookup, local_port): case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): bpf_ctx_record_field_size(info, sizeof(__u32)); diff --git a/net/core/gro.c b/net/core/gro.c index a11b286d1495..ee5e7e889d8b 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -459,29 +459,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_set_network_header(skb, skb_gro_offset(skb)); skb_reset_mac_len(skb); - NAPI_GRO_CB(skb)->same_flow = 0; + BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), + sizeof(u32))); /* Avoid slow unaligned acc */ + *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); - NAPI_GRO_CB(skb)->free = 0; - NAPI_GRO_CB(skb)->encap_mark = 0; - NAPI_GRO_CB(skb)->recursion_counter = 0; - NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->is_atomic = 1; - NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ switch (skb->ip_summed) { case CHECKSUM_COMPLETE: NAPI_GRO_CB(skb)->csum = skb->csum; NAPI_GRO_CB(skb)->csum_valid = 1; - NAPI_GRO_CB(skb)->csum_cnt = 0; break; case CHECKSUM_UNNECESSARY: NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; - NAPI_GRO_CB(skb)->csum_valid = 0; break; - default: - NAPI_GRO_CB(skb)->csum_cnt = 0; - NAPI_GRO_CB(skb)->csum_valid = 0; } pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, @@ -634,7 +627,6 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; - skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); if (unlikely(skb->slow_gro)) { skb_orphan(skb); skb_ext_reset(skb); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index b0f5344d1185..95098d1a49bd 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -166,10 +166,10 @@ static void linkwatch_do_dev(struct net_device *dev) netdev_state_change(dev); } - /* Note: our callers are responsible for - * calling netdev_tracker_free(). + /* Note: our callers are responsible for calling netdev_tracker_free(). + * This is the reason we use __dev_put() instead of dev_put(). */ - dev_put(dev); + __dev_put(dev); } static void __linkwatch_run_queue(int urgent_only) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 8711350085d6..0ec2f5906a27 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem); static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; #endif -struct net init_net = { - .ns.count = REFCOUNT_INIT(1), - .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), -#ifdef CONFIG_KEYS - .key_domain = &init_net_key_domain, -#endif -}; +struct net init_net; EXPORT_SYMBOL(init_net); static bool init_net_initialized; @@ -1087,7 +1081,7 @@ out: rtnl_set_sk_err(net, RTNLGRP_NSID, err); } -static int __init net_ns_init(void) +void __init net_ns_init(void) { struct net_generic *ng; @@ -1108,6 +1102,9 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); +#ifdef CONFIG_KEYS + init_net.key_domain = &init_net_key_domain; +#endif down_write(&pernet_ops_rwsem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); @@ -1122,12 +1119,8 @@ static int __init net_ns_init(void) RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, RTNL_FLAG_DOIT_UNLOCKED); - - return 0; } -pure_initcall(net_ns_init); - static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) { ops_pre_exit_list(ops, net_exit_list); diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 99f3af1a9d4d..fe6094e9a2db 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/debugfs.h> +#include <linux/jhash.h> #include "hsr_main.h" #include "hsr_framereg.h" @@ -28,6 +29,7 @@ hsr_node_table_show(struct seq_file *sfp, void *data) { struct hsr_priv *priv = (struct hsr_priv *)sfp->private; struct hsr_node *node; + int i; seq_printf(sfp, "Node Table entries for (%s) device\n", (priv->prot_version == PRP_V1 ? "PRP" : "HSR")); @@ -39,22 +41,28 @@ hsr_node_table_show(struct seq_file *sfp, void *data) seq_puts(sfp, "DAN-H\n"); rcu_read_lock(); - list_for_each_entry_rcu(node, &priv->node_db, mac_list) { - /* skip self node */ - if (hsr_addr_is_self(priv, node->macaddress_A)) - continue; - seq_printf(sfp, "%pM ", &node->macaddress_A[0]); - seq_printf(sfp, "%pM ", &node->macaddress_B[0]); - seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]); - seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]); - seq_printf(sfp, "%14x, ", node->addr_B_port); - - if (priv->prot_version == PRP_V1) - seq_printf(sfp, "%5x, %5x, %5x\n", - node->san_a, node->san_b, - (node->san_a == 0 && node->san_b == 0)); - else - seq_printf(sfp, "%5x\n", 1); + + for (i = 0 ; i < priv->hash_buckets; i++) { + hlist_for_each_entry_rcu(node, &priv->node_db[i], mac_list) { + /* skip self node */ + if (hsr_addr_is_self(priv, node->macaddress_A)) + continue; + seq_printf(sfp, "%pM ", &node->macaddress_A[0]); + seq_printf(sfp, "%pM ", &node->macaddress_B[0]); + seq_printf(sfp, "%10lx, ", + node->time_in[HSR_PT_SLAVE_A]); + seq_printf(sfp, "%10lx, ", + node->time_in[HSR_PT_SLAVE_B]); + seq_printf(sfp, "%14x, ", node->addr_B_port); + + if (priv->prot_version == PRP_V1) + seq_printf(sfp, "%5x, %5x, %5x\n", + node->san_a, node->san_b, + (node->san_a == 0 && + node->san_b == 0)); + else + seq_printf(sfp, "%5x\n", 1); + } } rcu_read_unlock(); return 0; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e57fdad9ef94..7f250216433d 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -485,12 +485,16 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], { bool unregister = false; struct hsr_priv *hsr; - int res; + int res, i; hsr = netdev_priv(hsr_dev); INIT_LIST_HEAD(&hsr->ports); - INIT_LIST_HEAD(&hsr->node_db); - INIT_LIST_HEAD(&hsr->self_node_db); + INIT_HLIST_HEAD(&hsr->self_node_db); + hsr->hash_buckets = HSR_HSIZE; + get_random_bytes(&hsr->hash_seed, sizeof(hsr->hash_seed)); + for (i = 0; i < hsr->hash_buckets; i++) + INIT_HLIST_HEAD(&hsr->node_db[i]); + spin_lock_init(&hsr->list_lock); eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index e59cbb4f0cd1..5bf357734b11 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -570,20 +570,23 @@ static int fill_frame_info(struct hsr_frame_info *frame, struct ethhdr *ethhdr; __be16 proto; int ret; + u32 hash; /* Check if skb contains ethhdr */ if (skb->mac_len < sizeof(struct ethhdr)) return -EINVAL; memset(frame, 0, sizeof(*frame)); + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + hash = hsr_mac_hash(port->hsr, ethhdr->h_source); frame->is_supervision = is_supervision_frame(port->hsr, skb); - frame->node_src = hsr_get_node(port, &hsr->node_db, skb, + frame->node_src = hsr_get_node(port, &hsr->node_db[hash], skb, frame->is_supervision, port->type); if (!frame->node_src) return -1; /* Unknown node and !is_supervision, or no mem */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); frame->is_vlan = false; proto = ethhdr->h_proto; diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 0775f0f95dbf..b3c6ffa1894d 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -15,11 +15,28 @@ #include <linux/etherdevice.h> #include <linux/slab.h> #include <linux/rculist.h> +#include <linux/jhash.h> #include "hsr_main.h" #include "hsr_framereg.h" #include "hsr_netlink.h" -/* TODO: use hash lists for mac addresses (linux/jhash.h)? */ +u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr) +{ + u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed); + + return reciprocal_scale(hash, hsr->hash_buckets); +} + +struct hsr_node *hsr_node_get_first(struct hlist_head *head) +{ + struct hlist_node *first; + + first = rcu_dereference(hlist_first_rcu(head)); + if (first) + return hlist_entry(first, struct hsr_node, mac_list); + + return NULL; +} /* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, * false otherwise. @@ -42,8 +59,7 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { struct hsr_node *node; - node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node, - mac_list); + node = hsr_node_get_first(&hsr->self_node_db); if (!node) { WARN_ONCE(1, "HSR: No self node\n"); return false; @@ -59,12 +75,12 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) /* Search for mac entry. Caller must hold rcu read lock. */ -static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, +static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db, const unsigned char addr[ETH_ALEN]) { struct hsr_node *node; - list_for_each_entry_rcu(node, node_db, mac_list) { + hlist_for_each_entry_rcu(node, node_db, mac_list) { if (ether_addr_equal(node->macaddress_A, addr)) return node; } @@ -79,7 +95,7 @@ int hsr_create_self_node(struct hsr_priv *hsr, const unsigned char addr_a[ETH_ALEN], const unsigned char addr_b[ETH_ALEN]) { - struct list_head *self_node_db = &hsr->self_node_db; + struct hlist_head *self_node_db = &hsr->self_node_db; struct hsr_node *node, *oldnode; node = kmalloc(sizeof(*node), GFP_KERNEL); @@ -90,14 +106,13 @@ int hsr_create_self_node(struct hsr_priv *hsr, ether_addr_copy(node->macaddress_B, addr_b); spin_lock_bh(&hsr->list_lock); - oldnode = list_first_or_null_rcu(self_node_db, - struct hsr_node, mac_list); + oldnode = hsr_node_get_first(self_node_db); if (oldnode) { - list_replace_rcu(&oldnode->mac_list, &node->mac_list); + hlist_replace_rcu(&oldnode->mac_list, &node->mac_list); spin_unlock_bh(&hsr->list_lock); kfree_rcu(oldnode, rcu_head); } else { - list_add_tail_rcu(&node->mac_list, self_node_db); + hlist_add_tail_rcu(&node->mac_list, self_node_db); spin_unlock_bh(&hsr->list_lock); } @@ -106,25 +121,25 @@ int hsr_create_self_node(struct hsr_priv *hsr, void hsr_del_self_node(struct hsr_priv *hsr) { - struct list_head *self_node_db = &hsr->self_node_db; + struct hlist_head *self_node_db = &hsr->self_node_db; struct hsr_node *node; spin_lock_bh(&hsr->list_lock); - node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); + node = hsr_node_get_first(self_node_db); if (node) { - list_del_rcu(&node->mac_list); + hlist_del_rcu(&node->mac_list); kfree_rcu(node, rcu_head); } spin_unlock_bh(&hsr->list_lock); } -void hsr_del_nodes(struct list_head *node_db) +void hsr_del_nodes(struct hlist_head *node_db) { struct hsr_node *node; - struct hsr_node *tmp; + struct hlist_node *tmp; - list_for_each_entry_safe(node, tmp, node_db, mac_list) - kfree(node); + hlist_for_each_entry_safe(node, tmp, node_db, mac_list) + kfree_rcu(node, rcu_head); } void prp_handle_san_frame(bool san, enum hsr_port_type port, @@ -145,7 +160,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port, * originating from the newly added node. */ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, - struct list_head *node_db, + struct hlist_head *node_db, unsigned char addr[], u16 seq_out, bool san, enum hsr_port_type rx_port) @@ -175,14 +190,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, hsr->proto_ops->handle_san_frame(san, rx_port, new_node); spin_lock_bh(&hsr->list_lock); - list_for_each_entry_rcu(node, node_db, mac_list, - lockdep_is_held(&hsr->list_lock)) { + hlist_for_each_entry_rcu(node, node_db, mac_list, + lockdep_is_held(&hsr->list_lock)) { if (ether_addr_equal(node->macaddress_A, addr)) goto out; if (ether_addr_equal(node->macaddress_B, addr)) goto out; } - list_add_tail_rcu(&new_node->mac_list, node_db); + hlist_add_tail_rcu(&new_node->mac_list, node_db); spin_unlock_bh(&hsr->list_lock); return new_node; out: @@ -202,7 +217,7 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup) /* Get the hsr_node from which 'skb' was sent. */ -struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, +struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db, struct sk_buff *skb, bool is_sup, enum hsr_port_type rx_port) { @@ -218,7 +233,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, ethhdr = (struct ethhdr *)skb_mac_header(skb); - list_for_each_entry_rcu(node, node_db, mac_list) { + hlist_for_each_entry_rcu(node, node_db, mac_list) { if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) { if (hsr->proto_ops->update_san_info) hsr->proto_ops->update_san_info(node, is_sup); @@ -268,11 +283,12 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) struct hsr_sup_tlv *hsr_sup_tlv; struct hsr_node *node_real; struct sk_buff *skb = NULL; - struct list_head *node_db; + struct hlist_head *node_db; struct ethhdr *ethhdr; int i; unsigned int pull_size = 0; unsigned int total_pull_size = 0; + u32 hash; /* Here either frame->skb_hsr or frame->skb_prp should be * valid as supervision frame always will have protocol @@ -310,11 +326,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) hsr_sp = (struct hsr_sup_payload *)skb->data; /* Merge node_curr (registered on macaddress_B) into node_real */ - node_db = &port_rcv->hsr->node_db; - node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A); + node_db = port_rcv->hsr->node_db; + hash = hsr_mac_hash(hsr, hsr_sp->macaddress_A); + node_real = find_node_by_addr_A(&node_db[hash], hsr_sp->macaddress_A); if (!node_real) /* No frame received from AddrA of this node yet */ - node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A, + node_real = hsr_add_node(hsr, &node_db[hash], + hsr_sp->macaddress_A, HSR_SEQNR_START - 1, true, port_rcv->type); if (!node_real) @@ -348,7 +366,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) hsr_sp = (struct hsr_sup_payload *)skb->data; /* Check if redbox mac and node mac are equal. */ - if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) { + if (!ether_addr_equal(node_real->macaddress_A, + hsr_sp->macaddress_A)) { /* This is a redbox supervision frame for a VDAN! */ goto done; } @@ -368,7 +387,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) node_real->addr_B_port = port_rcv->type; spin_lock_bh(&hsr->list_lock); - list_del_rcu(&node_curr->mac_list); + hlist_del_rcu(&node_curr->mac_list); spin_unlock_bh(&hsr->list_lock); kfree_rcu(node_curr, rcu_head); @@ -406,6 +425,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, struct hsr_port *port) { struct hsr_node *node_dst; + u32 hash; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: Mac header not set\n", __func__); @@ -415,7 +435,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest)) return; - node_dst = find_node_by_addr_A(&port->hsr->node_db, + hash = hsr_mac_hash(port->hsr, eth_hdr(skb)->h_dest); + node_dst = find_node_by_addr_A(&port->hsr->node_db[hash], eth_hdr(skb)->h_dest); if (!node_dst) { if (net_ratelimit()) @@ -491,59 +512,73 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr, void hsr_prune_nodes(struct timer_list *t) { struct hsr_priv *hsr = from_timer(hsr, t, prune_timer); + struct hlist_node *tmp; struct hsr_node *node; - struct hsr_node *tmp; struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; + int i; spin_lock_bh(&hsr->list_lock); - list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) { - /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A] - * nor time_in[HSR_PT_SLAVE_B], will ever be updated for - * the master port. Thus the master node will be repeatedly - * pruned leading to packet loss. - */ - if (hsr_addr_is_self(hsr, node->macaddress_A)) - continue; - - /* Shorthand */ - time_a = node->time_in[HSR_PT_SLAVE_A]; - time_b = node->time_in[HSR_PT_SLAVE_B]; - - /* Check for timestamps old enough to risk wrap-around */ - if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2)) - node->time_in_stale[HSR_PT_SLAVE_A] = true; - if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2)) - node->time_in_stale[HSR_PT_SLAVE_B] = true; - - /* Get age of newest frame from node. - * At least one time_in is OK here; nodes get pruned long - * before both time_ins can get stale - */ - timestamp = time_a; - if (node->time_in_stale[HSR_PT_SLAVE_A] || - (!node->time_in_stale[HSR_PT_SLAVE_B] && - time_after(time_b, time_a))) - timestamp = time_b; - - /* Warn of ring error only as long as we get frames at all */ - if (time_is_after_jiffies(timestamp + - msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) { - rcu_read_lock(); - port = get_late_port(hsr, node); - if (port) - hsr_nl_ringerror(hsr, node->macaddress_A, port); - rcu_read_unlock(); - } - /* Prune old entries */ - if (time_is_before_jiffies(timestamp + - msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { - hsr_nl_nodedown(hsr, node->macaddress_A); - list_del_rcu(&node->mac_list); - /* Note that we need to free this entry later: */ - kfree_rcu(node, rcu_head); + for (i = 0; i < hsr->hash_buckets; i++) { + hlist_for_each_entry_safe(node, tmp, &hsr->node_db[i], + mac_list) { + /* Don't prune own node. + * Neither time_in[HSR_PT_SLAVE_A] + * nor time_in[HSR_PT_SLAVE_B], will ever be updated + * for the master port. Thus the master node will be + * repeatedly pruned leading to packet loss. + */ + if (hsr_addr_is_self(hsr, node->macaddress_A)) + continue; + + /* Shorthand */ + time_a = node->time_in[HSR_PT_SLAVE_A]; + time_b = node->time_in[HSR_PT_SLAVE_B]; + + /* Check for timestamps old enough to + * risk wrap-around + */ + if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2)) + node->time_in_stale[HSR_PT_SLAVE_A] = true; + if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2)) + node->time_in_stale[HSR_PT_SLAVE_B] = true; + + /* Get age of newest frame from node. + * At least one time_in is OK here; nodes get pruned + * long before both time_ins can get stale + */ + timestamp = time_a; + if (node->time_in_stale[HSR_PT_SLAVE_A] || + (!node->time_in_stale[HSR_PT_SLAVE_B] && + time_after(time_b, time_a))) + timestamp = time_b; + + /* Warn of ring error only as long as we get + * frames at all + */ + if (time_is_after_jiffies(timestamp + + msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) { + rcu_read_lock(); + port = get_late_port(hsr, node); + if (port) + hsr_nl_ringerror(hsr, + node->macaddress_A, + port); + rcu_read_unlock(); + } + + /* Prune old entries */ + if (time_is_before_jiffies(timestamp + + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr, node->macaddress_A); + hlist_del_rcu(&node->mac_list); + /* Note that we need to free this + * entry later: + */ + kfree_rcu(node, rcu_head); + } } } spin_unlock_bh(&hsr->list_lock); @@ -557,17 +592,19 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) { struct hsr_node *node; + u32 hash; + + hash = hsr_mac_hash(hsr, addr); if (!_pos) { - node = list_first_or_null_rcu(&hsr->node_db, - struct hsr_node, mac_list); + node = hsr_node_get_first(&hsr->node_db[hash]); if (node) ether_addr_copy(addr, node->macaddress_A); return node; } node = _pos; - list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) { + hlist_for_each_entry_continue_rcu(node, mac_list) { ether_addr_copy(addr, node->macaddress_A); return node; } @@ -587,8 +624,11 @@ int hsr_get_node_data(struct hsr_priv *hsr, struct hsr_node *node; struct hsr_port *port; unsigned long tdiff; + u32 hash; + + hash = hsr_mac_hash(hsr, addr); - node = find_node_by_addr_A(&hsr->node_db, addr); + node = find_node_by_addr_A(&hsr->node_db[hash], addr); if (!node) return -ENOENT; diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index bdbb8c822ba1..d7cce6b161e3 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -28,9 +28,11 @@ struct hsr_frame_info { bool is_from_san; }; +u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr); +struct hsr_node *hsr_node_get_first(struct hlist_head *head); void hsr_del_self_node(struct hsr_priv *hsr); -void hsr_del_nodes(struct list_head *node_db); -struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, +void hsr_del_nodes(struct hlist_head *node_db); +struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db, struct sk_buff *skb, bool is_sup, enum hsr_port_type rx_port); void hsr_handle_sup_frame(struct hsr_frame_info *frame); @@ -68,7 +70,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port, void prp_update_san_info(struct hsr_node *node, bool is_sup); struct hsr_node { - struct list_head mac_list; + struct hlist_node mac_list; unsigned char macaddress_A[ETH_ALEN]; unsigned char macaddress_B[ETH_ALEN]; /* Local slave through which AddrB frames are received from this node */ diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index ff9ec7634218..ca556bda3467 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -63,6 +63,9 @@ struct hsr_tag { #define HSR_V1_SUP_LSDUSIZE 52 +#define HSR_HSIZE_SHIFT 8 +#define HSR_HSIZE BIT(HSR_HSIZE_SHIFT) + /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or * equivalently, the 4 most significant bits of HSR tag byte 14). @@ -201,8 +204,8 @@ struct hsr_proto_ops { struct hsr_priv { struct rcu_head rcu_head; struct list_head ports; - struct list_head node_db; /* Known HSR nodes */ - struct list_head self_node_db; /* MACs of slaves */ + struct hlist_head node_db[HSR_HSIZE]; /* Known HSR nodes */ + struct hlist_head self_node_db; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ struct timer_list prune_timer; int announce_count; @@ -212,6 +215,8 @@ struct hsr_priv { spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ struct hsr_proto_ops *proto_ops; + u32 hash_buckets; + u32 hash_seed; #define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set * based on SLAVE_A or SLAVE_B */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index f3c8f91dbe2c..1405c037cf7a 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -105,6 +105,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, static void hsr_dellink(struct net_device *dev, struct list_head *head) { struct hsr_priv *hsr = netdev_priv(dev); + int i; del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); @@ -113,7 +114,8 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) hsr_del_ports(hsr); hsr_del_self_node(hsr); - hsr_del_nodes(&hsr->node_db); + for (i = 0; i < hsr->hash_buckets; i++) + hsr_del_nodes(&hsr->node_db[i]); unregister_netdevice_queue(dev, head); } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 2cf62718a282..2c087b7f17c5 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -47,6 +47,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/ieee802154.h> +#include <linux/if_arp.h> #include <net/ipv6.h> diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index b60c9fd7147e..f79ab942f03b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -96,12 +96,14 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id) + u32 *next_btf_id, + enum bpf_type_flag *flag) { size_t end; if (atype == BPF_READ) - return btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); if (t != tcp_sock_type) { bpf_log(log, "only read is supported\n"); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4d61ddd8a0ec..54811728d906 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -32,6 +32,7 @@ #include <linux/list.h> #include <linux/slab.h> +#include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -735,8 +736,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, memset(cfg, 0, sizeof(*cfg)); rtm = nlmsg_data(nlh); + + if (!inet_validate_dscp(rtm->rtm_tos)) { + NL_SET_ERR_MSG(extack, + "Invalid dsfield (tos): ECN bits must be 0"); + err = -EINVAL; + goto errout; + } + cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos); + cfg->fc_dst_len = rtm->rtm_dst_len; - cfg->fc_tos = rtm->rtm_tos; cfg->fc_table = rtm->rtm_table; cfg->fc_protocol = rtm->rtm_protocol; cfg->fc_scope = rtm->rtm_scope; @@ -1547,7 +1556,7 @@ static void ip_fib_net_exit(struct net *net) { int i; - rtnl_lock(); + ASSERT_RTNL(); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); @@ -1572,7 +1581,7 @@ static void ip_fib_net_exit(struct net *net) #ifdef CONFIG_IP_MULTIPLE_TABLES fib4_rules_exit(net); #endif - rtnl_unlock(); + kfree(net->ipv4.fib_table_hash); fib4_notifier_exit(net); } @@ -1599,7 +1608,9 @@ out: out_proc: nl_fib_lookup_exit(net); out_nlfl: + rtnl_lock(); ip_fib_net_exit(net); + rtnl_unlock(); goto out; } @@ -1607,12 +1618,23 @@ static void __net_exit fib_net_exit(struct net *net) { fib_proc_exit(net); nl_fib_lookup_exit(net); - ip_fib_net_exit(net); +} + +static void __net_exit fib_net_exit_batch(struct list_head *net_list) +{ + struct net *net; + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + ip_fib_net_exit(net); + + rtnl_unlock(); } static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, + .exit_batch = fib_net_exit_batch, }; void __init ip_fib_init(void) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index e184bcb19943..a63014b54809 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -4,13 +4,14 @@ #include <linux/types.h> #include <linux/list.h> +#include <net/inet_dscp.h> #include <net/ip_fib.h> #include <net/nexthop.h> struct fib_alias { struct hlist_node fa_list; struct fib_info *fa_info; - u8 fa_tos; + dscp_t fa_dscp; u8 fa_type; u8 fa_state; u8 fa_slen; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index e0b6c8b6de57..117c48571cf0 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/export.h> +#include <net/inet_dscp.h> #include <net/ip.h> #include <net/route.h> #include <net/tcp.h> @@ -35,7 +36,7 @@ struct fib4_rule { struct fib_rule common; u8 dst_len; u8 src_len; - u8 tos; + dscp_t dscp; __be32 src; __be32 srcmask; __be32 dst; @@ -49,7 +50,7 @@ static bool fib4_rule_matchall(const struct fib_rule *rule) { struct fib4_rule *r = container_of(rule, struct fib4_rule, common); - if (r->dst_len || r->src_len || r->tos) + if (r->dst_len || r->src_len || r->dscp) return false; return fib_rule_matchall(rule); } @@ -185,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, ((daddr ^ r->dst) & r->dstmask)) return 0; - if (r->tos && (r->tos != fl4->flowi4_tos)) + if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos)) return 0; if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) @@ -225,10 +226,12 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; - if (frh->tos & ~IPTOS_TOS_MASK) { - NL_SET_ERR_MSG(extack, "Invalid tos"); + if (!inet_validate_dscp(frh->tos)) { + NL_SET_ERR_MSG(extack, + "Invalid dsfield (tos): ECN bits must be 0"); goto errout; } + rule4->dscp = inet_dsfield_to_dscp(frh->tos); /* split local/main if they are not already split */ err = fib_unmerge(net); @@ -270,7 +273,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->srcmask = inet_make_mask(rule4->src_len); rule4->dst_len = frh->dst_len; rule4->dstmask = inet_make_mask(rule4->dst_len); - rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; @@ -313,7 +315,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->dst_len && (rule4->dst_len != frh->dst_len)) return 0; - if (frh->tos && (rule4->tos != frh->tos)) + if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos) return 0; #ifdef CONFIG_IP_ROUTE_CLASSID @@ -337,7 +339,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->dst_len = rule4->dst_len; frh->src_len = rule4->src_len; - frh->tos = rule4->tos; + frh->tos = inet_dscp_to_dsfield(rule4->dscp); if ((rule4->dst_len && nla_put_in_addr(skb, FRA_DST, rule4->dst)) || diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4c5399450682..c9c4f2f66b38 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -32,6 +32,7 @@ #include <linux/hash.h> #include <net/arp.h> +#include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -523,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, fri.tb_id = tb_id; fri.dst = key; fri.dst_len = dst_len; - fri.tos = fa->fa_tos; + fri.tos = inet_dscp_to_dsfield(fa->fa_dscp); fri.type = fa->fa_type; fri.offload = fa->offload; fri.trap = fa->trap; @@ -2039,7 +2040,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) int order = -1, last_idx = -1; struct fib_alias *fa, *fa1 = NULL; u32 last_prio = res->fi->fib_priority; - u8 last_tos = 0; + dscp_t last_dscp = 0; hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; @@ -2047,19 +2048,20 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) if (fa->fa_slen != slen) continue; - if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + if (fa->fa_dscp && + fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos)) continue; if (fa->tb_id != tb->tb_id) continue; if (next_fi->fib_priority > last_prio && - fa->fa_tos == last_tos) { - if (last_tos) + fa->fa_dscp == last_dscp) { + if (last_dscp) continue; break; } if (next_fi->fib_flags & RTNH_F_DEAD) continue; - last_tos = fa->fa_tos; + last_dscp = fa->fa_dscp; last_prio = next_fi->fib_priority; if (next_fi->fib_scope != res->scope || diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 8060524f4256..c05cd105e95e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -61,6 +61,7 @@ #include <linux/vmalloc.h> #include <linux/notifier.h> #include <net/net_namespace.h> +#include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -81,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, - .tos = fa->fa_tos, + .tos = inet_dscp_to_dsfield(fa->fa_dscp), .type = fa->fa_type, .tb_id = fa->tb_id, }; @@ -98,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, - .tos = fa->fa_tos, + .tos = inet_dscp_to_dsfield(fa->fa_dscp), .type = fa->fa_type, .tb_id = fa->tb_id, }; @@ -973,13 +974,13 @@ static struct key_vector *fib_find_node(struct trie *t, return n; } -/* Return the first fib alias matching TOS with +/* Return the first fib alias matching DSCP with * priority less than or equal to PRIO. * If 'find_first' is set, return the first matching - * fib alias, regardless of TOS and priority. + * fib alias, regardless of DSCP and priority. */ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, - u8 tos, u32 prio, u32 tb_id, + dscp_t dscp, u32 prio, u32 tb_id, bool find_first) { struct fib_alias *fa; @@ -988,6 +989,10 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, return NULL; hlist_for_each_entry(fa, fah, fa_list) { + /* Avoid Sparse warning when using dscp_t in inequalities */ + u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp); + u8 __dscp = inet_dscp_to_dsfield(dscp); + if (fa->fa_slen < slen) continue; if (fa->fa_slen != slen) @@ -998,9 +1003,9 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, break; if (find_first) return fa; - if (fa->fa_tos > tos) + if (__fa_dscp > __dscp) continue; - if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) + if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp) return fa; } @@ -1027,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { if (fa->fa_slen == slen && fa->tb_id == fri->tb_id && - fa->fa_tos == fri->tos && fa->fa_info == fri->fi && - fa->fa_type == fri->type) + fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) && + fa->fa_info == fri->fi && fa->fa_type == fri->type) return fa; } @@ -1210,7 +1215,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, struct fib_info *fi; u8 plen = cfg->fc_dst_len; u8 slen = KEYLENGTH - plen; - u8 tos = cfg->fc_tos; + dscp_t dscp; u32 key; int err; @@ -1227,12 +1232,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb, goto err; } + dscp = cfg->fc_dscp; l = fib_find_node(t, &tp, key); - fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, + fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority, tb->tb_id, false) : NULL; /* Now fa, if non-NULL, points to the first fib alias - * with the same keys [prefix,tos,priority], if such key already + * with the same keys [prefix,dscp,priority], if such key already * exists or to the node before which we will insert new one. * * If fa is NULL, we will need to allocate a new one and @@ -1240,7 +1246,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, * of the new alias. */ - if (fa && fa->fa_tos == tos && + if (fa && fa->fa_dscp == dscp && fa->fa_info->fib_priority == fi->fib_priority) { struct fib_alias *fa_first, *fa_match; @@ -1260,7 +1266,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, hlist_for_each_entry_from(fa, fa_list) { if ((fa->fa_slen != slen) || (fa->tb_id != tb->tb_id) || - (fa->fa_tos != tos)) + (fa->fa_dscp != dscp)) break; if (fa->fa_info->fib_priority != fi->fib_priority) break; @@ -1288,7 +1294,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, goto out; fi_drop = fa->fa_info; - new_fa->fa_tos = fa->fa_tos; + new_fa->fa_dscp = fa->fa_dscp; new_fa->fa_info = fi; new_fa->fa_type = cfg->fc_type; state = fa->fa_state; @@ -1351,7 +1357,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, goto out; new_fa->fa_info = fi; - new_fa->fa_tos = tos; + new_fa->fa_dscp = dscp; new_fa->fa_type = cfg->fc_type; new_fa->fa_state = 0; new_fa->fa_slen = slen; @@ -1567,7 +1573,8 @@ found: if (index >= (1ul << fa->fa_slen)) continue; } - if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + if (fa->fa_dscp && + inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos) continue; if (fi->fib_dead) continue; @@ -1703,7 +1710,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, struct key_vector *l, *tp; u8 plen = cfg->fc_dst_len; u8 slen = KEYLENGTH - plen; - u8 tos = cfg->fc_tos; + dscp_t dscp; u32 key; key = ntohl(cfg->fc_dst); @@ -1715,11 +1722,13 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if (!l) return -ESRCH; - fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false); + dscp = cfg->fc_dscp; + fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false); if (!fa) return -ESRCH; - pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); + pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen, + inet_dscp_to_dsfield(dscp), t); fa_to_delete = NULL; hlist_for_each_entry_from(fa, fa_list) { @@ -1727,7 +1736,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if ((fa->fa_slen != slen) || (fa->tb_id != tb->tb_id) || - (fa->fa_tos != tos)) + (fa->fa_dscp != dscp)) break; if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && @@ -2295,7 +2304,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, fri.tb_id = tb->tb_id; fri.dst = xkey; fri.dst_len = KEYLENGTH - fa->fa_slen; - fri.tos = fa->fa_tos; + fri.tos = inet_dscp_to_dsfield(fa->fa_dscp); fri.type = fa->fa_type; fri.offload = fa->offload; fri.trap = fa->trap; @@ -2807,8 +2816,9 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) fa->fa_info->fib_scope), rtn_type(buf2, sizeof(buf2), fa->fa_type)); - if (fa->fa_tos) - seq_printf(seq, " tos=%d", fa->fa_tos); + if (fa->fa_dscp) + seq_printf(seq, " tos=%d", + inet_dscp_to_dsfield(fa->fa_dscp)); seq_putc(seq, '\n'); } } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 30ab717ff1b8..17440840a791 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { + local_bh_disable(); inet_ehash_nolisten(sk, osk, NULL); + local_bh_enable(); return 0; } WARN_ON(!sk_unhashed(sk)); @@ -669,45 +671,54 @@ int inet_hash(struct sock *sk) { int err = 0; - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); + if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); - local_bh_enable(); - } return err; } EXPORT_SYMBOL_GPL(inet_hash); -void inet_unhash(struct sock *sk) +static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct inet_listen_hashbucket *ilb = NULL; - spinlock_t *lock; - if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) { - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &ilb->lock; - } else { - lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - } - spin_lock_bh(lock); - if (sk_unhashed(sk)) - goto unlock; - if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_stop_listen_sock(sk); if (ilb) { + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + inet_unhash2(hashinfo, sk); ilb->count--; } __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); -unlock: - spin_unlock_bh(lock); +} + +void inet_unhash(struct sock *sk) +{ + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + + if (sk_unhashed(sk)) + return; + + if (sk->sk_state == TCP_LISTEN) { + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + /* Don't disable bottom halves while acquiring the lock to + * avoid circular locking dependency on PREEMPT_RT. + */ + spin_lock(&ilb->lock); + __inet_unhash(sk, ilb); + spin_unlock(&ilb->lock); + } else { + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + + spin_lock_bh(lock); + __inet_unhash(sk, NULL); + spin_unlock_bh(lock); + } } EXPORT_SYMBOL_GPL(inet_unhash); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3a025c011971..d94f9f7e60c3 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -196,7 +196,8 @@ resubmit: if (ipprot) { if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); + kfree_skb_reason(skb, + SKB_DROP_REASON_XFRM_POLICY); return; } nf_reset_ct(skb); @@ -215,7 +216,7 @@ resubmit: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO); } else { __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); consume_skb(skb); @@ -318,8 +319,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, { const struct iphdr *iph = ip_hdr(skb); int (*edemux)(struct sk_buff *skb); + int err, drop_reason; struct rtable *rt; - int err; + + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (ip_can_use_hint(skb, iph, hint)) { err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, @@ -396,19 +399,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, * so-called "hole-196" attack) so do it for both. */ if (in_dev && - IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) + IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) { + drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST; goto drop; + } } return NET_RX_SUCCESS; drop: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return NET_RX_DROP; drop_error: - if (err == -EXDEV) + if (err == -EXDEV) { + drop_reason = SKB_DROP_REASON_IP_RPFILTER; __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); + } goto drop; } @@ -436,13 +443,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) { const struct iphdr *iph; + int drop_reason; u32 len; /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ - if (skb->pkt_type == PACKET_OTHERHOST) + if (skb->pkt_type == PACKET_OTHERHOST) { + drop_reason = SKB_DROP_REASON_OTHERHOST; goto drop; + } __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); @@ -452,6 +462,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) goto out; } + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; @@ -488,6 +499,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) len = ntohs(iph->tot_len); if (skb->len < len) { + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) @@ -516,11 +528,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) return skb; csum_error: + drop_reason = SKB_DROP_REASON_IP_CSUM; __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: + if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED) + drop_reason = SKB_DROP_REASON_IP_INHDR; __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); out: return NULL; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 07274619b9ea..4a55a620e526 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -266,13 +266,12 @@ static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); - rtnl_unlock(); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, @@ -328,10 +327,9 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { - rtnl_lock(); + ASSERT_RTNL(); ipmr_free_table(net->ipv4.mrt); net->ipv4.mrt = NULL; - rtnl_unlock(); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, @@ -3075,7 +3073,9 @@ static int __net_init ipmr_net_init(struct net *net) proc_cache_fail: remove_proc_entry("ip_mr_vif", net->proc_net); proc_vif_fail: + rtnl_lock(); ipmr_rules_exit(net); + rtnl_unlock(); #endif ipmr_rules_fail: ipmr_notifier_exit(net); @@ -3090,12 +3090,22 @@ static void __net_exit ipmr_net_exit(struct net *net) remove_proc_entry("ip_mr_vif", net->proc_net); #endif ipmr_notifier_exit(net); - ipmr_rules_exit(net); +} + +static void __net_exit ipmr_net_exit_batch(struct list_head *net_list) +{ + struct net *net; + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + ipmr_rules_exit(net); + rtnl_unlock(); } static struct pernet_operations ipmr_net_ops = { .init = ipmr_net_init, .exit = ipmr_net_exit, + .exit_batch = ipmr_net_exit_batch, }; int __init ip_mr_init(void) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index eeafeccebb8d..e459a391e607 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3733,12 +3733,16 @@ out: } EXPORT_SYMBOL(nexthop_res_grp_activity_update); -static void __net_exit nexthop_net_exit(struct net *net) +static void __net_exit nexthop_net_exit_batch(struct list_head *net_list) { + struct net *net; + rtnl_lock(); - flush_all_nexthops(net); + list_for_each_entry(net, net_list, exit_list) { + flush_all_nexthops(net); + kfree(net->nexthop.devhash); + } rtnl_unlock(); - kfree(net->nexthop.devhash); } static int __net_init nexthop_net_init(struct net *net) @@ -3756,7 +3760,7 @@ static int __net_init nexthop_net_init(struct net *net) static struct pernet_operations nexthop_net_ops = { .init = nexthop_net_init, - .exit = nexthop_net_exit, + .exit_batch = nexthop_net_exit_batch, }; static int __init nexthop_init(void) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8b35075088e1..634766e6c7cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -84,6 +84,7 @@ #include <linux/jhash.h> #include <net/dst.h> #include <net/dst_metadata.h> +#include <net/inet_dscp.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> @@ -3391,7 +3392,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (fa->fa_slen == slen && fa->tb_id == fri.tb_id && - fa->fa_tos == fri.tos && + fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) && fa->fa_info == res.fi && fa->fa_type == fri.type) { fri.offload = fa->offload; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 090360939401..6b4d8361560f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2093,16 +2093,20 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); + int drop_reason; /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) + if (rc == -ENOMEM) { UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); - else + drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; + } else { UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, is_udplite); + drop_reason = SKB_DROP_REASON_PROTO_MEM; + } UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); trace_udp_fail_queue_rcv_skb(rc, sk); return -1; } @@ -2120,14 +2124,17 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { + int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); /* * Charge it to the socket, dropping if the queue is full. */ - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } nf_reset_ct(skb); if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { @@ -2204,8 +2211,10 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) + if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; + } udp_csum_pull_header(skb); @@ -2213,11 +2222,12 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) return __udp_queue_rcv_skb(sk, skb); csum_error: + drop_reason = SKB_DROP_REASON_UDP_CSUM; __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return -1; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f927c199a93c..4f402bc38f05 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -146,18 +146,11 @@ static int ipv6_generate_stable_address(struct in6_addr *addr, #define IN6_ADDR_HSIZE_SHIFT 8 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) -/* - * Configured unicast address hash table - */ -static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; -static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(void); -static void addrconf_verify_rtnl(void); -static void addrconf_verify_work(struct work_struct *); +static void addrconf_verify(struct net *net); +static void addrconf_verify_rtnl(struct net *net); static struct workqueue_struct *addrconf_wq; -static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -554,7 +547,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - devconf->mc_forwarding) < 0) + atomic_read(&devconf->mc_forwarding)) < 0) goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && @@ -1011,9 +1004,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev) return true; @@ -1024,20 +1015,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) { - unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr); + struct net *net = dev_net(dev); + unsigned int hash = inet6_addr_hash(net, &ifa->addr); int err = 0; - spin_lock(&addrconf_hash_lock); + spin_lock(&net->ipv6.addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) { + if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) { netdev_dbg(dev, "ipv6_add_addr: already assigned\n"); err = -EEXIST; } else { - hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); + hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]); } - spin_unlock(&addrconf_hash_lock); + spin_unlock(&net->ipv6.addrconf_hash_lock); return err; } @@ -1261,9 +1253,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - int state; enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; + struct net *net = dev_net(ifp->idev->dev); unsigned long expires; + int state; ASSERT_RTNL(); @@ -1275,9 +1268,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (state == INET6_IFADDR_STATE_DEAD) goto out; - spin_lock_bh(&addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); hlist_del_init_rcu(&ifp->addr_lst); - spin_unlock_bh(&addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); write_lock_bh(&ifp->idev->lock); @@ -1920,10 +1913,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, if (skip_dev_check) dev = NULL; - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { ndev = ifp->idev->dev; - if (!net_eq(dev_net(ndev), net)) - continue; if (l3mdev_master_dev_rcu(ndev) != l3mdev) continue; @@ -2027,9 +2018,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add struct inet6_ifaddr *ifp, *result = NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { @@ -2096,7 +2085,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; - struct net *net = dev_net(ifp->idev->dev); + struct net *net = dev_net(idev->dev); if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -2675,7 +2664,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, create, now); in6_ifa_put(ifp); - addrconf_verify(); + addrconf_verify(net); } return 0; @@ -2987,7 +2976,7 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, cfg->valid_lft, cfg->preferred_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); return 0; } else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, false, @@ -3027,7 +3016,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, manage_tempaddrs(idev, ifp, 0, 0, false, jiffies); ipv6_del_addr(ifp); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); if (ipv6_addr_is_multicast(pfx)) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, false, pfx, dev->ifindex); @@ -3772,9 +3761,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { - struct hlist_head *h = &inet6_addr_lst[i]; + struct hlist_head *h = &net->ipv6.inet6_addr_lst[i]; - spin_lock_bh(&addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { @@ -3790,7 +3779,7 @@ restart: } } } - spin_unlock_bh(&addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); } write_lock_bh(&idev->lock); @@ -4246,7 +4235,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, * before this temporary address becomes deprecated. */ if (ifp->flags & IFA_F_TEMPORARY) - addrconf_verify_rtnl(); + addrconf_verify_rtnl(dev_net(dev)); } static void addrconf_dad_run(struct inet6_dev *idev, bool restart) @@ -4288,10 +4277,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; /* sync with offset */ if (p < state->offset) { p++; @@ -4314,8 +4301,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); hlist_for_each_entry_continue_rcu(ifa, addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; state->offset++; return ifa; } @@ -4323,9 +4308,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, state->offset = 0; while (++state->bucket < IN6_ADDR_HSIZE) { hlist_for_each_entry_rcu(ifa, - &inet6_addr_lst[state->bucket], addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; + &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) { return ifa; } } @@ -4413,9 +4396,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) int ret = 0; rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr) && (ifp->flags & IFA_F_HOMEADDRESS)) { ret = 1; @@ -4453,9 +4434,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, hash = inet6_addr_hash(net, addr); hash_found = false; - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { hash_found = true; @@ -4484,7 +4463,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, * Periodic address status verification */ -static void addrconf_verify_rtnl(void) +static void addrconf_verify_rtnl(struct net *net) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; @@ -4496,11 +4475,11 @@ static void addrconf_verify_rtnl(void) now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - cancel_delayed_work(&addr_chk_work); + cancel_delayed_work(&net->ipv6.addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -4599,20 +4578,23 @@ restart: pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); + mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } static void addrconf_verify_work(struct work_struct *w) { + struct net *net = container_of(to_delayed_work(w), struct net, + ipv6.addr_chk_work); + rtnl_lock(); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); rtnl_unlock(); } -static void addrconf_verify(void) +static void addrconf_verify(struct net *net) { - mod_delayed_work(addrconf_wq, &addr_chk_work, 0); + mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0); } static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, @@ -4708,7 +4690,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, return 0; } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) +static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, + struct ifa6_config *cfg) { u32 flags; clock_t expires; @@ -4822,7 +4805,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) jiffies); } - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); return 0; } @@ -4909,7 +4892,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, !(nlh->nlmsg_flags & NLM_F_REPLACE)) err = -EEXIST; else - err = inet6_addr_modify(ifa, &cfg); + err = inet6_addr_modify(net, ifa, &cfg); in6_ifa_put(ifa); @@ -5533,7 +5516,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; + array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; @@ -5794,7 +5777,7 @@ update_lft: write_unlock_bh(&idev->lock); inet6_ifinfo_notify(RTM_NEWLINK, idev); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(dev_net(dev)); return 0; } @@ -7111,6 +7094,14 @@ static int __net_init addrconf_init_net(struct net *net) int err = -ENOMEM; struct ipv6_devconf *all, *dflt; + spin_lock_init(&net->ipv6.addrconf_hash_lock); + INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work); + net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!net->ipv6.inet6_addr_lst) + goto err_alloc_addr; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); if (!all) goto err_alloc_all; @@ -7172,11 +7163,15 @@ err_reg_all: err_alloc_dflt: kfree(all); err_alloc_all: + kfree(net->ipv6.inet6_addr_lst); +err_alloc_addr: return err; } static void __net_exit addrconf_exit_net(struct net *net) { + int i; + #ifdef CONFIG_SYSCTL __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt, NETCONFA_IFINDEX_DEFAULT); @@ -7184,7 +7179,19 @@ static void __net_exit addrconf_exit_net(struct net *net) NETCONFA_IFINDEX_ALL); #endif kfree(net->ipv6.devconf_dflt); + net->ipv6.devconf_dflt = NULL; kfree(net->ipv6.devconf_all); + net->ipv6.devconf_all = NULL; + + cancel_delayed_work(&net->ipv6.addr_chk_work); + /* + * Check hash table, then free it. + */ + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i])); + + kfree(net->ipv6.inet6_addr_lst); + net->ipv6.inet6_addr_lst = NULL; } static struct pernet_operations addrconf_ops = { @@ -7207,7 +7214,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = { int __init addrconf_init(void) { struct inet6_dev *idev; - int i, err; + int err; err = ipv6_addr_label_init(); if (err < 0) { @@ -7254,12 +7261,9 @@ int __init addrconf_init(void) ip6_route_init_special_entries(); - for (i = 0; i < IN6_ADDR_HSIZE; i++) - INIT_HLIST_HEAD(&inet6_addr_lst[i]); - register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(); + addrconf_verify(&init_net); rtnl_af_register(&inet6_ops); @@ -7317,7 +7321,6 @@ out: void addrconf_cleanup(void) { struct net_device *dev; - int i; unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); @@ -7335,14 +7338,6 @@ void addrconf_cleanup(void) } addrconf_ifdown(init_net.loopback_dev, true); - /* - * Check hash table. - */ - spin_lock_bh(&addrconf_hash_lock); - for (i = 0; i < IN6_ADDR_HSIZE; i++) - WARN_ON(!hlist_empty(&inet6_addr_lst[i])); - spin_unlock_bh(&addrconf_hash_lock); - cancel_delayed_work(&addr_chk_work); rtnl_unlock(); destroy_workqueue(addrconf_wq); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ec029c86ae06..7c2003833010 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -16,6 +16,7 @@ #include <linux/indirect_call_wrapper.h> #include <net/fib_rules.h> +#include <net/inet_dscp.h> #include <net/ipv6.h> #include <net/addrconf.h> #include <net/ip6_route.h> @@ -25,14 +26,14 @@ struct fib6_rule { struct fib_rule common; struct rt6key src; struct rt6key dst; - u8 tclass; + dscp_t dscp; }; static bool fib6_rule_matchall(const struct fib_rule *rule) { struct fib6_rule *r = container_of(rule, struct fib6_rule, common); - if (r->dst.plen || r->src.plen || r->tclass) + if (r->dst.plen || r->src.plen || r->dscp) return false; return fib_rule_matchall(rule); } @@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, return 0; } - if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel)) + if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) return 0; if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) @@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; + if (!inet_validate_dscp(frh->tos)) { + NL_SET_ERR_MSG(extack, + "Invalid dsfield (tos): ECN bits must be 0"); + goto errout; + } + rule6->dscp = inet_dsfield_to_dscp(frh->tos); + if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid table"); @@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; - rule6->tclass = frh->tos; if (fib_rule_requires_fldissect(rule)) net->ipv6.fib6_rules_require_fldissect++; @@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) return 0; - if (frh->tos && (rule6->tclass != frh->tos)) + if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos) return 0; if (frh->src_len && @@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->dst_len = rule6->dst.plen; frh->src_len = rule6->src.plen; - frh->tos = rule6->tclass; + frh->tos = inet_dscp_to_dsfield(rule6->dscp); if ((rule6->dst.plen && nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || @@ -486,16 +493,21 @@ out_fib6_rules_ops: goto out; } -static void __net_exit fib6_rules_net_exit(struct net *net) +static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list) { + struct net *net; + rtnl_lock(); - fib_rules_unregister(net->ipv6.fib6_rules_ops); + list_for_each_entry(net, net_list, exit_list) { + fib_rules_unregister(net->ipv6.fib6_rules_ops); + cond_resched(); + } rtnl_unlock(); } static struct pernet_operations fib6_rules_net_ops = { .init = fib6_rules_net_init, - .exit = fib6_rules_net_exit, + .exit_batch = fib6_rules_net_exit_batch, }; int __init fib6_rules_init(void) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4514444e96c8..4740afecf7c6 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) { int err = 0; - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); + if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); - local_bh_enable(); - } return err; } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f90a87389fcc..f6f5b83dd954 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -32,13 +32,25 @@ struct ioam6_lwt_encap { struct ioam6_trace_hdr traceh; } __packed; +struct ioam6_lwt_freq { + u32 k; + u32 n; +}; + struct ioam6_lwt { struct dst_cache cache; + struct ioam6_lwt_freq freq; + atomic_t pkt_cnt; u8 mode; struct in6_addr tundst; struct ioam6_lwt_encap tuninfo; }; +static struct netlink_range_validation freq_range = { + .min = IOAM6_IPTUNNEL_FREQ_MIN, + .max = IOAM6_IPTUNNEL_FREQ_MAX, +}; + static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) { return (struct ioam6_lwt *)lwt->data; @@ -55,6 +67,8 @@ static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt) } static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_FREQ_K] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range), + [IOAM6_IPTUNNEL_FREQ_N] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range), [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8, IOAM6_IPTUNNEL_MODE_MIN, IOAM6_IPTUNNEL_MODE_MAX), @@ -96,6 +110,7 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, struct lwtunnel_state *lwt; struct ioam6_lwt *ilwt; int len_aligned, err; + u32 freq_k, freq_n; u8 mode; if (family != AF_INET6) @@ -106,6 +121,23 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, if (err < 0) return err; + if ((!tb[IOAM6_IPTUNNEL_FREQ_K] && tb[IOAM6_IPTUNNEL_FREQ_N]) || + (tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N])) { + NL_SET_ERR_MSG(extack, "freq: missing parameter"); + return -EINVAL; + } else if (!tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N]) { + freq_k = IOAM6_IPTUNNEL_FREQ_MIN; + freq_n = IOAM6_IPTUNNEL_FREQ_MIN; + } else { + freq_k = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_K]); + freq_n = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_N]); + + if (freq_k > freq_n) { + NL_SET_ERR_MSG(extack, "freq: k > n is forbidden"); + return -EINVAL; + } + } + if (!tb[IOAM6_IPTUNNEL_MODE]) mode = IOAM6_IPTUNNEL_MODE_INLINE; else @@ -140,6 +172,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, return err; } + atomic_set(&ilwt->pkt_cnt, 0); + ilwt->freq.k = freq_k; + ilwt->freq.n = freq_n; + ilwt->mode = mode; if (tb[IOAM6_IPTUNNEL_DST]) ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]); @@ -263,11 +299,18 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; + u32 pkt_cnt; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; ilwt = ioam6_lwt_state(dst->lwtstate); + + /* Check for insertion frequency (i.e., "k over n" insertions) */ + pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt); + if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k) + goto out; + orig_daddr = ipv6_hdr(skb)->daddr; switch (ilwt->mode) { @@ -358,6 +401,14 @@ static int ioam6_fill_encap_info(struct sk_buff *skb, struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); int err; + err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_K, ilwt->freq.k); + if (err) + goto ret; + + err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_N, ilwt->freq.n); + if (err) + goto ret; + err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode); if (err) goto ret; @@ -379,7 +430,9 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); int nlsize; - nlsize = nla_total_size(sizeof(ilwt->mode)) + + nlsize = nla_total_size(sizeof(ilwt->freq.k)) + + nla_total_size(sizeof(ilwt->freq.n)) + + nla_total_size(sizeof(ilwt->mode)) + nla_total_size(sizeof(ilwt->tuninfo.traceh)); if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) @@ -395,7 +448,9 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a); struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b); - return (ilwt_a->mode != ilwt_b->mode || + return (ilwt_a->freq.k != ilwt_b->freq.k || + ilwt_a->freq.n != ilwt_b->freq.n || + ilwt_a->mode != ilwt_b->mode || (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE && !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) || trace_a->namespace_id != trace_b->namespace_id); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 80256717868e..d4b1e2c5aa76 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -508,7 +508,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b47ffc81f9e5..53f632a560ec 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1122,7 +1122,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } else if (skb->protocol == htons(ETH_P_IP)) { - struct rtable *rt = skb_rtable(skb); + const struct rtable *rt = skb_rtable(skb); + + if (!rt) + goto tx_err_link_failure; if (rt->rt_gw_family == AF_INET6) memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr)); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7cf73e60e619..881fe6b50307 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -253,13 +253,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr_table *mrt, *next; - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } fib_rules_unregister(net->ipv6.mr6_rules_ops); - rtnl_unlock(); } static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, @@ -316,10 +315,9 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { - rtnl_lock(); + ASSERT_RTNL(); ip6mr_free_table(net->ipv6.mrt6); net->ipv6.mrt6 = NULL; - rtnl_unlock(); } static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, @@ -732,7 +730,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding--; + atomic_dec(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -900,7 +898,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding++; + atomic_inc(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -1323,7 +1321,9 @@ static int __net_init ip6mr_net_init(struct net *net) proc_cache_fail: remove_proc_entry("ip6_mr_vif", net->proc_net); proc_vif_fail: + rtnl_lock(); ip6mr_rules_exit(net); + rtnl_unlock(); #endif ip6mr_rules_fail: ip6mr_notifier_exit(net); @@ -1336,13 +1336,23 @@ static void __net_exit ip6mr_net_exit(struct net *net) remove_proc_entry("ip6_mr_cache", net->proc_net); remove_proc_entry("ip6_mr_vif", net->proc_net); #endif - ip6mr_rules_exit(net); ip6mr_notifier_exit(net); } +static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list) +{ + struct net *net; + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + ip6mr_rules_exit(net); + rtnl_unlock(); +} + static struct pernet_operations ip6mr_net_ops = { .init = ip6mr_net_init, .exit = ip6mr_net_exit, + .exit_batch = ip6mr_net_exit_batch, }; int __init ip6_mr_init(void) @@ -1551,7 +1561,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) } else { rcu_assign_pointer(mrt->mroute_sk, sk); sock_set_flag(sk, SOCK_RCU_FREE); - net->ipv6.devconf_all->mc_forwarding++; + atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } write_unlock_bh(&mrt_lock); @@ -1567,14 +1577,19 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) int ip6mr_sk_done(struct sock *sk) { - int err = -EACCES; struct net *net = sock_net(sk); + struct ipv6_devconf *devconf; struct mr_table *mrt; + int err = -EACCES; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return err; + devconf = net->ipv6.devconf_all; + if (!devconf || !atomic_read(&devconf->mc_forwarding)) + return err; + rtnl_lock(); ip6mr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { @@ -1584,7 +1599,7 @@ int ip6mr_sk_done(struct sock *sk) * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ - net->ipv6.devconf_all->mc_forwarding--; + atomic_dec(&devconf->mc_forwarding); write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index c921de63b494..f0702d920d8d 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -6,6 +6,7 @@ * Copyright (c) 2021 Google */ +#include <linux/compat.h> #include <linux/if_arp.h> #include <linux/net.h> #include <linux/mctp.h> @@ -21,6 +22,8 @@ /* socket implementation */ +static void mctp_sk_expire_keys(struct timer_list *timer); + static int mctp_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -99,13 +102,20 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; if (addr) { + const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER | + MCTP_TAG_PREALLOC; + if (addrlen < sizeof(struct sockaddr_mctp)) return -EINVAL; if (addr->smctp_family != AF_MCTP) return -EINVAL; if (!mctp_sockaddr_is_ok(addr)) return -EINVAL; - if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER)) + if (addr->smctp_tag & ~tagbits) + return -EINVAL; + /* can't preallocate a non-owned tag */ + if (addr->smctp_tag & MCTP_TAG_PREALLOC && + !(addr->smctp_tag & MCTP_TAG_OWNER)) return -EINVAL; } else { @@ -248,6 +258,32 @@ out_free: return rc; } +/* We're done with the key; invalidate, stop reassembly, and remove from lists. + */ +static void __mctp_key_remove(struct mctp_sk_key *key, struct net *net, + unsigned long flags, unsigned long reason) +__releases(&key->lock) +__must_hold(&net->mctp.keys_lock) +{ + struct sk_buff *skb; + + trace_mctp_key_release(key, reason); + skb = key->reasm_head; + key->reasm_head = NULL; + key->reasm_dead = true; + key->valid = false; + mctp_dev_release_key(key->dev, key); + spin_unlock_irqrestore(&key->lock, flags); + + hlist_del(&key->hlist); + hlist_del(&key->sklist); + + /* unref for the lists */ + mctp_key_unref(key); + + kfree_skb(skb); +} + static int mctp_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -293,6 +329,115 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname, return -EINVAL; } +static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg) +{ + struct net *net = sock_net(&msk->sk); + struct mctp_sk_key *key = NULL; + struct mctp_ioc_tag_ctl ctl; + unsigned long flags; + u8 tag; + + if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl))) + return -EFAULT; + + if (ctl.tag) + return -EINVAL; + + if (ctl.flags) + return -EINVAL; + + key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY, + true, &tag); + if (IS_ERR(key)) + return PTR_ERR(key); + + ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC; + if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) { + spin_lock_irqsave(&key->lock, flags); + __mctp_key_remove(key, net, flags, MCTP_TRACE_KEY_DROPPED); + mctp_key_unref(key); + return -EFAULT; + } + + mctp_key_unref(key); + return 0; +} + +static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg) +{ + struct net *net = sock_net(&msk->sk); + struct mctp_ioc_tag_ctl ctl; + unsigned long flags, fl2; + struct mctp_sk_key *key; + struct hlist_node *tmp; + int rc; + u8 tag; + + if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl))) + return -EFAULT; + + if (ctl.flags) + return -EINVAL; + + /* Must be a local tag, TO set, preallocated */ + if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC)) + return -EINVAL; + + tag = ctl.tag & MCTP_TAG_MASK; + rc = -EINVAL; + + spin_lock_irqsave(&net->mctp.keys_lock, flags); + hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { + /* we do an irqsave here, even though we know the irq state, + * so we have the flags to pass to __mctp_key_remove + */ + spin_lock_irqsave(&key->lock, fl2); + if (key->manual_alloc && + ctl.peer_addr == key->peer_addr && + tag == key->tag) { + __mctp_key_remove(key, net, fl2, + MCTP_TRACE_KEY_DROPPED); + rc = 0; + } else { + spin_unlock_irqrestore(&key->lock, fl2); + } + } + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + return rc; +} + +static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); + + switch (cmd) { + case SIOCMCTPALLOCTAG: + return mctp_ioctl_alloctag(msk, arg); + case SIOCMCTPDROPTAG: + return mctp_ioctl_droptag(msk, arg); + } + + return -EINVAL; +} + +#ifdef CONFIG_COMPAT +static int mctp_compat_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = compat_ptr(arg); + + switch (cmd) { + /* These have compatible ptr layouts */ + case SIOCMCTPALLOCTAG: + case SIOCMCTPDROPTAG: + return mctp_ioctl(sock, cmd, (unsigned long)argp); + } + + return -ENOIOCTLCMD; +} +#endif + static const struct proto_ops mctp_dgram_ops = { .family = PF_MCTP, .release = mctp_release, @@ -302,7 +447,7 @@ static const struct proto_ops mctp_dgram_ops = { .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, - .ioctl = sock_no_ioctl, + .ioctl = mctp_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -312,6 +457,9 @@ static const struct proto_ops mctp_dgram_ops = { .recvmsg = mctp_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_ioctl = mctp_compat_ioctl, +#endif }; static void mctp_sk_expire_keys(struct timer_list *timer) @@ -319,7 +467,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer) struct mctp_sock *msk = container_of(timer, struct mctp_sock, key_expiry); struct net *net = sock_net(&msk->sk); - unsigned long next_expiry, flags; + unsigned long next_expiry, flags, fl2; struct mctp_sk_key *key; struct hlist_node *tmp; bool next_expiry_valid = false; @@ -327,15 +475,16 @@ static void mctp_sk_expire_keys(struct timer_list *timer) spin_lock_irqsave(&net->mctp.keys_lock, flags); hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { - spin_lock(&key->lock); + /* don't expire. manual_alloc is immutable, no locking + * required. + */ + if (key->manual_alloc) + continue; + spin_lock_irqsave(&key->lock, fl2); if (!time_after_eq(key->expiry, jiffies)) { - trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT); - key->valid = false; - hlist_del_rcu(&key->hlist); - hlist_del_rcu(&key->sklist); - spin_unlock(&key->lock); - mctp_key_unref(key); + __mctp_key_remove(key, net, fl2, + MCTP_TRACE_KEY_TIMEOUT); continue; } @@ -346,7 +495,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer) next_expiry = key->expiry; next_expiry_valid = true; } - spin_unlock(&key->lock); + spin_unlock_irqrestore(&key->lock, fl2); } spin_unlock_irqrestore(&net->mctp.keys_lock, flags); @@ -387,9 +536,9 @@ static void mctp_sk_unhash(struct sock *sk) { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct net *net = sock_net(sk); + unsigned long flags, fl2; struct mctp_sk_key *key; struct hlist_node *tmp; - unsigned long flags; /* remove from any type-based binds */ mutex_lock(&net->mctp.bind_lock); @@ -399,20 +548,8 @@ static void mctp_sk_unhash(struct sock *sk) /* remove tag allocations */ spin_lock_irqsave(&net->mctp.keys_lock, flags); hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { - hlist_del(&key->sklist); - hlist_del(&key->hlist); - - trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED); - - spin_lock(&key->lock); - kfree_skb(key->reasm_head); - key->reasm_head = NULL; - key->reasm_dead = true; - key->valid = false; - spin_unlock(&key->lock); - - /* key is no longer on the lookup lists, unref */ - mctp_key_unref(key); + spin_lock_irqsave(&key->lock, fl2); + __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED); } spin_unlock_irqrestore(&net->mctp.keys_lock, flags); } diff --git a/net/mctp/device.c b/net/mctp/device.c index ef2755f82f87..02ddc0f1bd3e 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -6,6 +6,7 @@ * Copyright (c) 2021 Google */ +#include <linux/if_arp.h> #include <linux/if_link.h> #include <linux/mctp.h> #include <linux/netdevice.h> diff --git a/net/mctp/route.c b/net/mctp/route.c index 8d9f4ff3e285..17e3482aa770 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -64,8 +64,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) if (msk->bind_type != type) continue; - if (msk->bind_addr != MCTP_ADDR_ANY && - msk->bind_addr != mh->dest) + if (!mctp_address_matches(msk->bind_addr, mh->dest)) continue; return msk; @@ -77,7 +76,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, mctp_eid_t peer, u8 tag) { - if (key->local_addr != local) + if (!mctp_address_matches(key->local_addr, local)) return false; if (key->peer_addr != peer) @@ -204,29 +203,38 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) return rc; } -/* We're done with the key; unset valid and remove from lists. There may still - * be outstanding refs on the key though... +/* Helper for mctp_route_input(). + * We're done with the key; unlock and unref the key. + * For the usual case of automatic expiry we remove the key from lists. + * In the case that manual allocation is set on a key we release the lock + * and local ref, reset reassembly, but don't remove from lists. */ -static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, - unsigned long flags) - __releases(&key->lock) +static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net, + unsigned long flags, unsigned long reason) +__releases(&key->lock) { struct sk_buff *skb; + trace_mctp_key_release(key, reason); skb = key->reasm_head; key->reasm_head = NULL; - key->reasm_dead = true; - key->valid = false; - mctp_dev_release_key(key->dev, key); + + if (!key->manual_alloc) { + key->reasm_dead = true; + key->valid = false; + mctp_dev_release_key(key->dev, key); + } spin_unlock_irqrestore(&key->lock, flags); - spin_lock_irqsave(&net->mctp.keys_lock, flags); - hlist_del(&key->hlist); - hlist_del(&key->sklist); - spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + if (!key->manual_alloc) { + spin_lock_irqsave(&net->mctp.keys_lock, flags); + hlist_del(&key->hlist); + hlist_del(&key->sklist); + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); - /* one unref for the lists */ - mctp_key_unref(key); + /* unref for the lists */ + mctp_key_unref(key); + } /* and one for the local reference */ mctp_key_unref(key); @@ -380,9 +388,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) /* we've hit a pending reassembly; not much we * can do but drop it */ - trace_mctp_key_release(key, - MCTP_TRACE_KEY_REPLIED); - __mctp_key_unlock_drop(key, net, f); + __mctp_key_done_in(key, net, f, + MCTP_TRACE_KEY_REPLIED); key = NULL; } rc = 0; @@ -424,9 +431,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) } else { if (key->reasm_head || key->reasm_dead) { /* duplicate start? drop everything */ - trace_mctp_key_release(key, - MCTP_TRACE_KEY_INVALIDATED); - __mctp_key_unlock_drop(key, net, f); + __mctp_key_done_in(key, net, f, + MCTP_TRACE_KEY_INVALIDATED); rc = -EEXIST; key = NULL; } else { @@ -449,10 +455,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * the reassembly/response key */ if (!rc && flags & MCTP_HDR_FLAG_EOM) { + msk = container_of(key->sk, struct mctp_sock, sk); sock_queue_rcv_skb(key->sk, key->reasm_head); key->reasm_head = NULL; - trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED); - __mctp_key_unlock_drop(key, net, f); + __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED); key = NULL; } @@ -580,9 +586,9 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, /* Allocate a locally-owned tag value for (saddr, daddr), and reserve * it for the socket msk */ -static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, - mctp_eid_t saddr, - mctp_eid_t daddr, u8 *tagp) +struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t daddr, mctp_eid_t saddr, + bool manual, u8 *tagp) { struct net *net = sock_net(&msk->sk); struct netns_mctp *mns = &net->mctp; @@ -616,9 +622,8 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, if (tmp->tag & MCTP_HDR_FLAG_TO) continue; - if (!((tmp->peer_addr == daddr || - tmp->peer_addr == MCTP_ADDR_ANY) && - tmp->local_addr == saddr)) + if (!(mctp_address_matches(tmp->peer_addr, daddr) && + mctp_address_matches(tmp->local_addr, saddr))) continue; spin_lock(&tmp->lock); @@ -638,6 +643,7 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, mctp_reserve_tag(net, key, msk); trace_mctp_key_acquire(key); + key->manual_alloc = manual; *tagp = key->tag; } @@ -651,6 +657,50 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, return key; } +static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, + mctp_eid_t daddr, + u8 req_tag, u8 *tagp) +{ + struct net *net = sock_net(&msk->sk); + struct netns_mctp *mns = &net->mctp; + struct mctp_sk_key *key, *tmp; + unsigned long flags; + + req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER); + key = NULL; + + spin_lock_irqsave(&mns->keys_lock, flags); + + hlist_for_each_entry(tmp, &mns->keys, hlist) { + if (tmp->tag != req_tag) + continue; + + if (!mctp_address_matches(tmp->peer_addr, daddr)) + continue; + + if (!tmp->manual_alloc) + continue; + + spin_lock(&tmp->lock); + if (tmp->valid) { + key = tmp; + refcount_inc(&key->refs); + spin_unlock(&tmp->lock); + break; + } + spin_unlock(&tmp->lock); + } + spin_unlock_irqrestore(&mns->keys_lock, flags); + + if (!key) + return ERR_PTR(-ENOENT); + + if (tagp) + *tagp = key->tag; + + return key; +} + /* routing lookups */ static bool mctp_rt_match_eid(struct mctp_route *rt, unsigned int net, mctp_eid_t eid) @@ -845,8 +895,14 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, if (rc) goto out_release; - if (req_tag & MCTP_HDR_FLAG_TO) { - key = mctp_alloc_local_tag(msk, saddr, daddr, &tag); + if (req_tag & MCTP_TAG_OWNER) { + if (req_tag & MCTP_TAG_PREALLOC) + key = mctp_lookup_prealloc_tag(msk, daddr, + req_tag, &tag); + else + key = mctp_alloc_local_tag(msk, daddr, saddr, + false, &tag); + if (IS_ERR(key)) { rc = PTR_ERR(key); goto out_release; @@ -857,7 +913,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, tag |= MCTP_HDR_FLAG_TO; } else { key = NULL; - tag = req_tag; + tag = req_tag & MCTP_TAG_MASK; } skb->protocol = htons(ETH_P_MCTP); diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 750f9f9b4daf..61205cf40074 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -369,14 +369,15 @@ static void mctp_test_route_input_sk(struct kunit *test) #define FL_S (MCTP_HDR_FLAG_SOM) #define FL_E (MCTP_HDR_FLAG_EOM) -#define FL_T (MCTP_HDR_FLAG_TO) +#define FL_TO (MCTP_HDR_FLAG_TO) +#define FL_T(t) ((t) & MCTP_HDR_TAG_MASK) static const struct mctp_route_input_sk_test mctp_route_input_sk_tests[] = { - { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 0, .deliver = true }, - { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 1, .deliver = false }, + { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 0, .deliver = true }, + { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 1, .deliver = false }, { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E), .type = 0, .deliver = false }, - { .hdr = RX_HDR(1, 10, 8, FL_E | FL_T), .type = 0, .deliver = false }, - { .hdr = RX_HDR(1, 10, 8, FL_T), .type = 0, .deliver = false }, + { .hdr = RX_HDR(1, 10, 8, FL_E | FL_TO), .type = 0, .deliver = false }, + { .hdr = RX_HDR(1, 10, 8, FL_TO), .type = 0, .deliver = false }, { .hdr = RX_HDR(1, 10, 8, 0), .type = 0, .deliver = false }, }; @@ -436,7 +437,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) __mctp_route_test_fini(test, dev, rt, sock); } -#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_T | (f) | ((s) << MCTP_HDR_SEQ_SHIFT)) +#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT)) static const struct mctp_route_input_sk_reasm_test mctp_route_input_sk_reasm_tests[] = { { @@ -522,12 +523,156 @@ static void mctp_route_input_sk_reasm_to_desc( KUNIT_ARRAY_PARAM(mctp_route_input_sk_reasm, mctp_route_input_sk_reasm_tests, mctp_route_input_sk_reasm_to_desc); +struct mctp_route_input_sk_keys_test { + const char *name; + mctp_eid_t key_peer_addr; + mctp_eid_t key_local_addr; + u8 key_tag; + struct mctp_hdr hdr; + bool deliver; +}; + +/* test packet rx in the presence of various key configurations */ +static void mctp_test_route_input_sk_keys(struct kunit *test) +{ + const struct mctp_route_input_sk_keys_test *params; + struct mctp_test_route *rt; + struct sk_buff *skb, *skb2; + struct mctp_test_dev *dev; + struct mctp_sk_key *key; + struct netns_mctp *mns; + struct mctp_sock *msk; + struct socket *sock; + unsigned long flags; + int rc; + u8 c; + + params = test->param_value; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + msk = container_of(sock->sk, struct mctp_sock, sk); + mns = &sock_net(sock->sk)->mctp; + + /* set the incoming tag according to test params */ + key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr, + params->key_tag, GFP_KERNEL); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key); + + spin_lock_irqsave(&mns->keys_lock, flags); + mctp_reserve_tag(&init_net, key, msk); + spin_unlock_irqrestore(&mns->keys_lock, flags); + + /* create packet and route */ + c = 0; + skb = mctp_test_create_skb_data(¶ms->hdr, &c); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + skb->dev = dev->ndev; + __mctp_cb(skb); + + rc = mctp_route_input(&rt->rt, skb); + + /* (potentially) receive message */ + skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + + if (params->deliver) + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); + else + KUNIT_EXPECT_PTR_EQ(test, skb2, NULL); + + if (skb2) + skb_free_datagram(sock->sk, skb2); + + mctp_key_unref(key); + __mctp_route_test_fini(test, dev, rt, sock); +} + +static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = { + { + .name = "direct match", + .key_peer_addr = 9, + .key_local_addr = 8, + .key_tag = 1, + .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)), + .deliver = true, + }, + { + .name = "flipped src/dest", + .key_peer_addr = 8, + .key_local_addr = 9, + .key_tag = 1, + .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)), + .deliver = false, + }, + { + .name = "peer addr mismatch", + .key_peer_addr = 9, + .key_local_addr = 8, + .key_tag = 1, + .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T(1)), + .deliver = false, + }, + { + .name = "tag value mismatch", + .key_peer_addr = 9, + .key_local_addr = 8, + .key_tag = 1, + .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(2)), + .deliver = false, + }, + { + .name = "TO mismatch", + .key_peer_addr = 9, + .key_local_addr = 8, + .key_tag = 1, + .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO), + .deliver = false, + }, + { + .name = "broadcast response", + .key_peer_addr = MCTP_ADDR_ANY, + .key_local_addr = 8, + .key_tag = 1, + .hdr = RX_HDR(1, 11, 8, FL_S | FL_E | FL_T(1)), + .deliver = true, + }, + { + .name = "any local match", + .key_peer_addr = 12, + .key_local_addr = MCTP_ADDR_ANY, + .key_tag = 1, + .hdr = RX_HDR(1, 12, 8, FL_S | FL_E | FL_T(1)), + .deliver = true, + }, +}; + +static void mctp_route_input_sk_keys_to_desc( + const struct mctp_route_input_sk_keys_test *t, + char *desc) +{ + sprintf(desc, "%s", t->name); +} + +KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests, + mctp_route_input_sk_keys_to_desc); + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), KUNIT_CASE_PARAM(mctp_test_route_input_sk, mctp_route_input_sk_gen_params), KUNIT_CASE_PARAM(mctp_test_route_input_sk_reasm, mctp_route_input_sk_reasm_gen_params), + KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys, + mctp_route_input_sk_keys_gen_params), {} }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d47795748ad7..5464c2d268bd 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1174,14 +1174,8 @@ skip_family: if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); - if (tb[MPTCP_PM_ADDR_ATTR_PORT]) { - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - NL_SET_ERR_MSG_ATTR(info->extack, attr, - "flags must have signal when using port"); - return -EINVAL; - } + if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); - } return 0; } @@ -1227,6 +1221,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; + if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + GENL_SET_ERR_MSG(info, "flags must have signal when using port"); + return -EINVAL; + } + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { GENL_SET_ERR_MSG(info, "can't allocate addr"); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 354cb472f386..d1c9dfbb11fa 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -621,7 +621,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, case NF_ACCEPT: break; case NF_DROP: - kfree_skb(skb); + kfree_skb_reason(skb, + SKB_DROP_REASON_NETFILTER_DROP); ret = NF_DROP_GETERR(verdict); if (ret == 0) ret = -EPERM; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 67ad08320886..7e8a39a35627 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -37,6 +37,7 @@ #include <net/genetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/pkt_cls.h> #include "datapath.h" #include "flow.h" @@ -1601,8 +1602,6 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, dp->user_features = 0; } -DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); - static int ovs_dp_set_upcall_portids(struct datapath *dp, const struct nlattr *ids) { @@ -1657,7 +1656,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { - u32 user_features = 0; + u32 user_features = 0, old_features = dp->user_features; int err; if (a[OVS_DP_ATTR_USER_FEATURES]) { @@ -1696,10 +1695,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) return err; } - if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) - static_branch_enable(&tc_recirc_sharing_support); - else - static_branch_disable(&tc_recirc_sharing_support); + if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && + !(old_features & OVS_DP_F_TC_RECIRC_SHARING)) + tc_skb_ext_tc_enable(); + else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && + (old_features & OVS_DP_F_TC_RECIRC_SHARING)) + tc_skb_ext_tc_disable(); return 0; } @@ -1839,6 +1840,9 @@ static void __dp_destroy(struct datapath *dp) struct flow_table *table = &dp->table; int i; + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) + tc_skb_ext_tc_disable(); + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; struct hlist_node *n; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index fcfe6cb46441..0cd29971a907 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex) extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; -DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support); - void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 02096f2ec678..f6cd24fd530c 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -34,6 +34,7 @@ #include <net/mpls.h> #include <net/ndisc.h> #include <net/nsh.h> +#include <net/pkt_cls.h> #include <net/netfilter/nf_conntrack_zones.h> #include "conntrack.h" @@ -895,7 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->mac_proto = res; #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - if (static_branch_unlikely(&tc_recirc_sharing_support)) { + if (tc_skb_ext_tc_enabled()) { tc_ext = skb_ext_find(skb, TC_SKB_EXT); key->recirc_id = tc_ext ? tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5f0f346b576f..ff1e6b474fef 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -49,6 +49,23 @@ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); +#ifdef CONFIG_NET_CLS_ACT +DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc); +EXPORT_SYMBOL(tc_skb_ext_tc); + +void tc_skb_ext_tc_enable(void) +{ + static_branch_inc(&tc_skb_ext_tc); +} +EXPORT_SYMBOL(tc_skb_ext_tc_enable); + +void tc_skb_ext_tc_disable(void) +{ + static_branch_dec(&tc_skb_ext_tc); +} +EXPORT_SYMBOL(tc_skb_ext_tc_disable); +#endif + static u32 destroy_obj_hashfn(const struct tcf_proto *tp) { return jhash_3words(tp->chain->index, tp->prio, @@ -1615,19 +1632,21 @@ int tcf_classify(struct sk_buff *skb, ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, &last_executed_chain); - /* If we missed on some chain */ - if (ret == TC_ACT_UNSPEC && last_executed_chain) { - struct tc_skb_cb *cb = tc_skb_cb(skb); - - ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) - return TC_ACT_SHOT; - ext->chain = last_executed_chain; - ext->mru = cb->mru; - ext->post_ct = cb->post_ct; - ext->post_ct_snat = cb->post_ct_snat; - ext->post_ct_dnat = cb->post_ct_dnat; - ext->zone = cb->zone; + if (tc_skb_ext_tc_enabled()) { + /* If we missed on some chain */ + if (ret == TC_ACT_UNSPEC && last_executed_chain) { + struct tc_skb_cb *cb = tc_skb_cb(skb); + + ext = tc_skb_ext_alloc(skb); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + ext->chain = last_executed_chain; + ext->mru = cb->mru; + ext->post_ct = cb->post_ct; + ext->post_ct_snat = cb->post_ct_snat; + ext->post_ct_dnat = cb->post_ct_dnat; + ext->zone = cb->zone; + } } return ret; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index efc84845bb6b..0024a692f0f8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1433,7 +1433,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, if (*zc && (out_iov || out_sg)) { if (out_iov) - n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1; + n_sgout = 1 + + iov_iter_npages_cap(out_iov, INT_MAX, data_len); else n_sgout = sg_nents(out_sg); n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 28ef3f4465ae..2abd64e4d589 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -343,9 +343,9 @@ out: } EXPORT_SYMBOL(xsk_tx_peek_desc); -static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs, - u32 max_entries) +static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries) { + struct xdp_desc *descs = pool->tx_descs; u32 nb_pkts = 0; while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts])) @@ -355,8 +355,7 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_d return nb_pkts; } -u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs, - u32 max_entries) +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries) { struct xdp_sock *xs; u32 nb_pkts; @@ -365,7 +364,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc * if (!list_is_singular(&pool->xsk_tx_list)) { /* Fallback to the non-batched version */ rcu_read_unlock(); - return xsk_tx_peek_release_fallback(pool, descs, max_entries); + return xsk_tx_peek_release_fallback(pool, max_entries); } xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list); @@ -374,7 +373,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc * goto out; } - nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries); + nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries); if (!nb_pkts) { xs->tx->queue_empty_descs++; goto out; @@ -386,7 +385,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc * * packets. This avoids having to implement any buffering in * the Tx path. */ - nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts); + nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts); if (!nb_pkts) goto out; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index fd39bb660ebc..b34fca6ada86 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -37,6 +37,7 @@ void xp_destroy(struct xsk_buff_pool *pool) if (!pool) return; + kvfree(pool->tx_descs); kvfree(pool->heads); kvfree(pool); } @@ -58,6 +59,12 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, if (!pool->heads) goto out; + if (xs->tx) { + pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL); + if (!pool->tx_descs) + goto out; + } + pool->chunk_mask = ~((u64)umem->chunk_size - 1); pool->addrs_cnt = umem->size; pool->heads_cnt = umem->chunks; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index e9aa2c236356..801cda5d1938 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -205,11 +205,11 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q, return false; } -static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, - struct xdp_desc *descs, - struct xsk_buff_pool *pool, u32 max) +static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, + u32 max) { u32 cached_cons = q->cached_cons, nb_entries = 0; + struct xdp_desc *descs = pool->tx_descs; while (cached_cons != q->cached_prod && nb_entries < max) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; @@ -282,12 +282,12 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q, return xskq_cons_read_desc(q, desc, pool); } -static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs, - struct xsk_buff_pool *pool, u32 max) +static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, + u32 max) { u32 entries = xskq_cons_nb_entries(q, max); - return xskq_cons_read_desc_batch(q, descs, pool, entries); + return xskq_cons_read_desc_batch(q, pool, entries); } /* To improve performance in the xskq_cons_release functions, only update local state here. @@ -304,13 +304,6 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) q->cached_cons += cnt; } -static inline bool xskq_cons_is_full(struct xsk_queue *q) -{ - /* No barriers needed since data is not accessed */ - return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == - q->nentries; -} - static inline u32 xskq_cons_present_entries(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 319fd31522f3..e69651a6902f 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -413,7 +413,7 @@ static void fixup_map(struct bpf_object *obj) for (i = 0; i < NR_TESTS; i++) { if (!strcmp(test_map_names[i], name) && (check_test_flags(i))) { - bpf_map__resize(map, num_map_entries); + bpf_map__set_max_entries(map, num_map_entries); continue; } } diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 3ec8ad9c1750..631f0cabe139 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -79,13 +79,11 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); const char *optstr = "FSN"; int prog_fd, map_fd, opt; + struct bpf_program *prog; struct bpf_object *obj; struct bpf_map *map; char filename[256]; @@ -123,11 +121,19 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) + return 1; + + prog = bpf_object__next_program(obj, NULL); + bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + err = bpf_object__load(obj); + if (err) return 1; + prog_fd = bpf_program__fd(prog); + map = bpf_object__next_map(obj, NULL); if (!map) { printf("finding a map in obj file failed\n"); diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 6c61d5f570fb..b3f6e49676ed 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -82,15 +82,13 @@ static void usage(const char *cmd) int main(int argc, char **argv) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; unsigned char opt_flags[256] = {}; const char *optstr = "i:T:P:SNFh"; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; int i, prog_fd, map_fd, opt; + struct bpf_program *prog; struct bpf_object *obj; __u32 max_pckt_size = 0; __u32 key = 0; @@ -148,11 +146,20 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) return 1; + prog = bpf_object__next_program(obj, NULL); + bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); + + err = bpf_object__load(obj); + if (err) + return 1; + + prog_fd = bpf_program__fd(prog); + /* static global var 'max_pcktsz' is accessible from .data section */ if (max_pckt_size) { map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data"); diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 79ccd9891924..1828487bae9a 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -75,14 +75,11 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; const char *prog_name = "xdp_fwd"; struct bpf_program *prog = NULL; struct bpf_program *pos; const char *sec_name; - int prog_fd, map_fd = -1; + int prog_fd = -1, map_fd = -1; char filename[PATH_MAX]; struct bpf_object *obj; int opt, i, idx, err; @@ -119,7 +116,6 @@ int main(int argc, char **argv) if (attach) { snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; if (access(filename, O_RDONLY) < 0) { printf("error accessing file %s: %s\n", @@ -127,7 +123,14 @@ int main(int argc, char **argv) return 1; } - err = bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) + return 1; + + prog = bpf_object__next_program(obj, NULL); + bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); + + err = bpf_object__load(obj); if (err) { printf("Does kernel support devmap lookup?\n"); /* If not, the error message will be: diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c index 25e3a405375f..87c54bfdbb70 100644 --- a/samples/bpf/xdp_redirect_cpu.bpf.c +++ b/samples/bpf/xdp_redirect_cpu.bpf.c @@ -491,7 +491,7 @@ int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) return bpf_redirect_map(&cpu_map, cpu_dest, 0); } -SEC("xdp_cpumap/redirect") +SEC("xdp/cpumap") int xdp_redirect_cpu_devmap(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; @@ -507,19 +507,19 @@ int xdp_redirect_cpu_devmap(struct xdp_md *ctx) return bpf_redirect_map(&tx_port, 0, 0); } -SEC("xdp_cpumap/pass") +SEC("xdp/cpumap") int xdp_redirect_cpu_pass(struct xdp_md *ctx) { return XDP_PASS; } -SEC("xdp_cpumap/drop") +SEC("xdp/cpumap") int xdp_redirect_cpu_drop(struct xdp_md *ctx) { return XDP_DROP; } -SEC("xdp_devmap/egress") +SEC("xdp/devmap") int xdp_redirect_egress_prog(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index a81704d3317b..5f74a70a9021 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -70,7 +70,7 @@ static void print_avail_progs(struct bpf_object *obj) printf(" Programs to be used for -p/--progname:\n"); bpf_object__for_each_program(pos, obj) { - if (bpf_program__is_xdp(pos)) { + if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) { if (!strncmp(bpf_program__name(pos), "xdp_prognum", sizeof("xdp_prognum") - 1)) printf(" %s\n", bpf_program__name(pos)); diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c index 59efd656e1b2..415bac1758e3 100644 --- a/samples/bpf/xdp_redirect_map.bpf.c +++ b/samples/bpf/xdp_redirect_map.bpf.c @@ -68,7 +68,7 @@ int xdp_redirect_map_native(struct xdp_md *ctx) return xdp_redirect_map(ctx, &tx_port_native); } -SEC("xdp_devmap/egress") +SEC("xdp/devmap") int xdp_redirect_map_egress(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c index bb0a5a3bfcf0..8b2fd4ec2c76 100644 --- a/samples/bpf/xdp_redirect_map_multi.bpf.c +++ b/samples/bpf/xdp_redirect_map_multi.bpf.c @@ -53,7 +53,7 @@ int xdp_redirect_map_native(struct xdp_md *ctx) return xdp_redirect_map(ctx, &forward_map_native); } -SEC("xdp_devmap/egress") +SEC("xdp/devmap") int xdp_devmap_prog(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 2d565ba54b8c..6dae87d83e1c 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -640,12 +640,10 @@ static void usage(const char *prog) int main(int ac, char **argv) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); const char *optstr = "SF"; + struct bpf_program *prog; struct bpf_object *obj; char filename[256]; char **ifname_list; @@ -653,7 +651,6 @@ int main(int ac, char **argv) int err, i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; total_ifindex = ac - 1; ifname_list = (argv + 1); @@ -684,14 +681,20 @@ int main(int ac, char **argv) return 1; } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) return 1; + prog = bpf_object__next_program(obj, NULL); + bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); + printf("\n******************loading bpf file*********************\n"); - if (!prog_fd) { - printf("bpf_prog_load_xattr: %s\n", strerror(errno)); + err = bpf_object__load(obj); + if (err) { + printf("bpf_object__load(): %s\n", strerror(errno)); return 1; } + prog_fd = bpf_program__fd(prog); lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map"); rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index fb2532d13aac..f2d90cba5164 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -450,14 +450,12 @@ static void stats_poll(int interval, int action, __u32 cfg_opt) int main(int argc, char **argv) { __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); int prog_fd, map_fd, opt, err; bool use_separators = true; struct config cfg = { 0 }; + struct bpf_program *prog; struct bpf_object *obj; struct bpf_map *map; char filename[256]; @@ -471,11 +469,19 @@ int main(int argc, char **argv) char *action_str = NULL; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) return EXIT_FAIL; + prog = bpf_object__next_program(obj, NULL); + bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); + + err = bpf_object__load(obj); + if (err) + return EXIT_FAIL; + prog_fd = bpf_program__fd(prog); + map = bpf_object__find_map_by_name(obj, "config_map"); stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map"); rx_queue_index_map = bpf_object__find_map_by_name(obj, "rx_queue_index_map"); diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index ae70a7943d85..c4332d068b91 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -1218,7 +1218,7 @@ int sample_setup_maps(struct bpf_map **maps) default: return -EINVAL; } - if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0) + if (bpf_map__set_max_entries(sample_map[i], sample_map_count[i]) < 0) return -errno; } sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI]; diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h index 5f44b877ecf5..f45051679977 100644 --- a/samples/bpf/xdp_sample_user.h +++ b/samples/bpf/xdp_sample_user.h @@ -61,7 +61,7 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size) #define __attach_tp(name) \ ({ \ - if (!bpf_program__is_tracing(skel->progs.name)) \ + if (bpf_program__type(skel->progs.name) != BPF_PROG_TYPE_TRACING)\ return -EINVAL; \ skel->links.name = bpf_program__attach(skel->progs.name); \ if (!skel->links.name) \ diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 7370c03c96fc..2e811e4331cc 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -152,9 +152,6 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port) int main(int argc, char **argv) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; int min_port = 0, max_port = 0, vip2tnl_map_fd; const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; @@ -162,6 +159,7 @@ int main(int argc, char **argv) __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; struct iptnl_info tnl = {}; + struct bpf_program *prog; struct bpf_object *obj; struct vip vip = {}; char filename[256]; @@ -259,15 +257,20 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) return 1; - if (!prog_fd) { - printf("bpf_prog_load_xattr: %s\n", strerror(errno)); + prog = bpf_object__next_program(obj, NULL); + bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); + + err = bpf_object__load(obj); + if (err) { + printf("bpf_object__load(): %s\n", strerror(errno)); return 1; } + prog_fd = bpf_program__fd(prog); rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl"); diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh index e6093adf4c06..c293941612e7 100755 --- a/scripts/pahole-flags.sh +++ b/scripts/pahole-flags.sh @@ -7,7 +7,7 @@ if ! [ -x "$(command -v ${PAHOLE})" ]; then exit 0 fi -pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') +pahole_ver=$($(dirname $0)/pahole-version.sh ${PAHOLE}) if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars diff --git a/scripts/pahole-version.sh b/scripts/pahole-version.sh new file mode 100755 index 000000000000..f8a32ab93ad1 --- /dev/null +++ b/scripts/pahole-version.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Usage: $ ./pahole-version.sh pahole +# +# Prints pahole's version in a 3-digit form, such as 119 for v1.19. + +if [ ! -x "$(command -v "$@")" ]; then + echo 0 + exit 1 +fi + +"$@" --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/' diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 111dff809c7b..606743c6db41 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -310,7 +310,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, { const char *prog_name = prog_info->name; const struct btf_type *func_type; - const struct bpf_func_info finfo; + const struct bpf_func_info finfo = {}; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); struct btf *prog_btf = NULL; diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index e999159fa28d..9c894b1447de 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -487,17 +487,12 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, size_t maxlen; bool res; - if (ifindex) - /* Only test offload-able program types */ - switch (prog_type) { - case BPF_PROG_TYPE_SCHED_CLS: - case BPF_PROG_TYPE_XDP: - break; - default: - return; - } + if (ifindex) { + p_info("BPF offload feature probing is not supported"); + return; + } - res = bpf_probe_prog_type(prog_type, ifindex); + res = libbpf_probe_bpf_prog_type(prog_type, NULL); #ifdef USE_LIBCAP /* Probe may succeed even if program load fails, for unprivileged users * check that we did not fail because of insufficient permissions @@ -535,7 +530,12 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, size_t maxlen; bool res; - res = bpf_probe_map_type(map_type, ifindex); + if (ifindex) { + p_info("BPF offload feature probing is not supported"); + return; + } + + res = libbpf_probe_bpf_map_type(map_type, NULL); /* Probe result depends on the success of map creation, no additional * check required for unprivileged users @@ -567,7 +567,12 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, bool res = false; if (supported_type) { - res = bpf_probe_helper(id, prog_type, ifindex); + if (ifindex) { + p_info("BPF offload feature probing is not supported"); + return; + } + + res = libbpf_probe_bpf_helper(prog_type, id, NULL); #ifdef USE_LIBCAP /* Probe may succeed even if program load fails, for * unprivileged users check that we did not fail because of diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 43e3f8700ecc..eacfc6a2060d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -378,13 +378,16 @@ static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name) int prog_fd = skel->progs.%2$s.prog_fd; \n\ ", obj_name, bpf_program__name(prog)); - switch (bpf_program__get_type(prog)) { + switch (bpf_program__type(prog)) { case BPF_PROG_TYPE_RAW_TRACEPOINT: tp_name = strchr(bpf_program__section_name(prog), '/') + 1; - printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name); + printf("\tint fd = skel_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name); break; case BPF_PROG_TYPE_TRACING: - printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n"); + if (bpf_program__expected_attach_type(prog) == BPF_TRACE_ITER) + printf("\tint fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);\n"); + else + printf("\tint fd = skel_raw_tracepoint_open(NULL, prog_fd);\n"); break; default: printf("\tint fd = ((void)prog_fd, 0); /* auto-attach not supported */\n"); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 9d01fa9de033..490f7bd54e4c 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -478,14 +478,11 @@ int main(int argc, char **argv) } if (!legacy_libbpf) { - enum libbpf_strict_mode mode; - /* Allow legacy map definitions for skeleton generation. * It will still be rejected if users use LIBBPF_STRICT_ALL * mode for loading generated skeleton. */ - mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS; - ret = libbpf_set_strict_mode(mode); + ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); if (ret) p_err("failed to enable libbpf strict mode: %d", ret); } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cf935c63e6f5..92a6f679ef7d 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1272,12 +1272,12 @@ static int do_run(int argc, char **argv) { char *data_fname_in = NULL, *data_fname_out = NULL; char *ctx_fname_in = NULL, *ctx_fname_out = NULL; - struct bpf_prog_test_run_attr test_attr = {0}; const unsigned int default_size = SZ_32K; void *data_in = NULL, *data_out = NULL; void *ctx_in = NULL, *ctx_out = NULL; unsigned int repeat = 1; int fd, err; + LIBBPF_OPTS(bpf_test_run_opts, test_attr); if (!REQ_ARGS(4)) return -1; @@ -1395,14 +1395,13 @@ static int do_run(int argc, char **argv) goto free_ctx_in; } - test_attr.prog_fd = fd; test_attr.repeat = repeat; test_attr.data_in = data_in; test_attr.data_out = data_out; test_attr.ctx_in = ctx_in; test_attr.ctx_out = ctx_out; - err = bpf_prog_test_run_xattr(&test_attr); + err = bpf_prog_test_run_opts(fd, &test_attr); if (err) { p_err("failed to run program: %s", strerror(errno)); goto free_ctx_out; @@ -2283,10 +2282,10 @@ static int do_profile(int argc, char **argv) profile_obj->rodata->num_metric = num_metric; /* adjust map sizes */ - bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu); - bpf_map__resize(profile_obj->maps.fentry_readings, num_metric); - bpf_map__resize(profile_obj->maps.accum_readings, num_metric); - bpf_map__resize(profile_obj->maps.counts, 1); + bpf_map__set_max_entries(profile_obj->maps.events, num_metric * num_cpu); + bpf_map__set_max_entries(profile_obj->maps.fentry_readings, num_metric); + bpf_map__set_max_entries(profile_obj->maps.accum_readings, num_metric); + bpf_map__set_max_entries(profile_obj->maps.counts, 1); /* change target name */ profile_tgt_name = profile_target_name(profile_tgt_fd); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 16a7574292a5..afe3d0d7f5f2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5076,6 +5076,16 @@ union bpf_attr { * associated to *xdp_md*, at *offset*. * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) + * Description + * Read *size* bytes from user space address *user_ptr* in *tsk*'s + * address space, and stores the data in *dst*. *flags* is not + * used yet and is provided for future extensibility. This helper + * can only be used by sleepable programs. + * Return + * 0 on success, or a negative error in case of failure. On error + * *dst* buffer is zeroed out. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5269,6 +5279,7 @@ union bpf_attr { FN(xdp_get_buff_len), \ FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ + FN(copy_from_user_task), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5563,7 +5574,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; @@ -6441,7 +6453,8 @@ struct bpf_sk_lookup { __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ __u32 remote_ip4; /* Network byte order */ __u32 remote_ip6[4]; /* Network byte order */ - __u32 remote_port; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f947b61b2107..b8b37fe76006 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -131,7 +131,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -194,7 +194,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index c2e8327010f9..16b21757b8bf 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -453,12 +453,14 @@ struct bpf_prog_test_run_attr { * out: length of cxt_out */ }; +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead") LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); /* * bpf_prog_test_run does not check that data_out is large enough. Consider - * using bpf_prog_test_run_xattr instead. + * using bpf_prog_test_run_opts instead. */ +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead") LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 90f56b0f585f..e3a8c947e89f 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -76,6 +76,9 @@ #define __PT_RC_REG ax #define __PT_SP_REG sp #define __PT_IP_REG ip +/* syscall uses r10 for PARM4 */ +#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10) +#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10) #else @@ -105,6 +108,9 @@ #define __PT_RC_REG rax #define __PT_SP_REG rsp #define __PT_IP_REG rip +/* syscall uses r10 for PARM4 */ +#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10) +#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10) #endif /* __i386__ */ @@ -112,6 +118,10 @@ #elif defined(bpf_target_s390) +struct pt_regs___s390 { + unsigned long orig_gpr2; +}; + /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) #define __PT_PARM1_REG gprs[2] @@ -124,6 +134,8 @@ #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] #define __PT_IP_REG psw.addr +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) #elif defined(bpf_target_arm) @@ -140,6 +152,10 @@ #elif defined(bpf_target_arm64) +struct pt_regs___arm64 { + unsigned long orig_x0; +}; + /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) #define __PT_PARM1_REG regs[0] @@ -152,6 +168,8 @@ #define __PT_RC_REG regs[0] #define __PT_SP_REG sp #define __PT_IP_REG pc +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) #elif defined(bpf_target_mips) @@ -178,6 +196,8 @@ #define __PT_RC_REG gpr[3] #define __PT_SP_REG sp #define __PT_IP_REG nip +/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx #elif defined(bpf_target_sparc) @@ -206,10 +226,12 @@ #define __PT_PARM4_REG a3 #define __PT_PARM5_REG a4 #define __PT_RET_REG ra -#define __PT_FP_REG fp +#define __PT_FP_REG s0 #define __PT_RC_REG a5 #define __PT_SP_REG sp -#define __PT_IP_REG epc +#define __PT_IP_REG pc +/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx #endif @@ -263,6 +285,26 @@ struct pt_regs; #endif +#ifndef PT_REGS_PARM1_SYSCALL +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x) +#endif +#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x) +#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x) +#ifndef PT_REGS_PARM4_SYSCALL +#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x) +#endif +#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x) + +#ifndef PT_REGS_PARM1_CORE_SYSCALL +#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x) +#endif +#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x) +#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x) +#ifndef PT_REGS_PARM4_CORE_SYSCALL +#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x) +#endif +#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x) + #else /* defined(bpf_target_defined) */ #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -290,8 +332,30 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + +#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + #endif /* defined(bpf_target_defined) */ +/* + * When invoked from a syscall handler kprobe, returns a pointer to a + * struct pt_regs containing syscall arguments and suitable for passing to + * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL(). + */ +#ifndef PT_REGS_SYSCALL_REGS +/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx)) +#endif + #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif @@ -406,4 +470,39 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +#define ___bpf_syscall_args0() ctx +#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) + +/* + * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for + * tracing syscall functions, like __x64_sys_close. It hides the underlying + * platform-specific low-level way of getting syscall input arguments from + * struct pt_regs, and provides a familiar typed and named function arguments + * syntax and semantics of accessing syscall input parameters. + * + * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + * + * This macro relies on BPF CO-RE support. + */ +#define BPF_KPROBE_SYSCALL(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_syscall_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + #endif diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 51862fdee850..951ac7475794 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -147,11 +147,10 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead") -LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used") LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, __u32 expected_value_size, @@ -159,8 +158,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); -LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, - __u32 *size); +LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, @@ -171,8 +169,10 @@ int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **line_info, __u32 *cnt); -LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size") +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size") +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b9a3260c83cb..07ebe70d3a30 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1861,14 +1861,16 @@ static int btf_dump_array_data(struct btf_dump *d, { const struct btf_array *array = btf_array(t); const struct btf_type *elem_type; - __u32 i, elem_size = 0, elem_type_id; + __u32 i, elem_type_id; + __s64 elem_size; bool is_array_member; elem_type_id = array->type; elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); elem_size = btf__resolve_size(d->btf, elem_type_id); if (elem_size <= 0) { - pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); + pr_warn("unexpected elem size %zd for array type [%u]\n", + (ssize_t)elem_size, id); return -EINVAL; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a8c750373ad5..2262bcdfee92 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -156,14 +156,6 @@ enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE; int libbpf_set_strict_mode(enum libbpf_strict_mode mode) { - /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to - * get all possible values we compensate last +1, and then (2*x - 1) - * to get the bit mask - */ - if (mode != LIBBPF_STRICT_ALL - && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1))) - return errno = EINVAL, -EINVAL; - libbpf_mode = mode; return 0; } @@ -237,6 +229,8 @@ enum sec_def_flags { SEC_SLOPPY_PFX = 16, /* BPF program support non-linear XDP buffer */ SEC_XDP_FRAGS = 32, + /* deprecated sec definitions not supposed to be used */ + SEC_DEPRECATED = 64, }; struct bpf_sec_def { @@ -4200,9 +4194,12 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) if (!bpf_map__is_internal(map)) { pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n"); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, def->value_size, &key_type_id, &value_type_id); +#pragma GCC diagnostic pop } else { /* * LLVM annotates global data differently in BTF, that is, @@ -6575,6 +6572,10 @@ static int libbpf_preload_prog(struct bpf_program *prog, if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; + if (def & SEC_DEPRECATED) + pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n", + prog->sec_name); + if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { @@ -7896,10 +7897,8 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path) return 0; } -const char *bpf_map__get_pin_path(const struct bpf_map *map) -{ - return map->pin_path; -} +__alias(bpf_map__pin_path) +const char *bpf_map__get_pin_path(const struct bpf_map *map); const char *bpf_map__pin_path(const struct bpf_map *map) { @@ -8464,7 +8463,10 @@ static int bpf_program_nth_fd(const struct bpf_program *prog, int n) return fd; } -enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog) +__alias(bpf_program__type) +enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); + +enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) { return prog->type; } @@ -8508,8 +8510,10 @@ BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); -enum bpf_attach_type -bpf_program__get_expected_attach_type(const struct bpf_program *prog) +__alias(bpf_program__expected_attach_type) +enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); + +enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) { return prog->expected_attach_type; } @@ -8593,7 +8597,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), - SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), @@ -8612,11 +8616,14 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), - SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED), SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), - SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED), SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), @@ -9459,7 +9466,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, open_attr.file = attr->file; open_attr.prog_type = attr->prog_type; - obj = bpf_object__open_xattr(&open_attr); + obj = __bpf_object__open_xattr(&open_attr, 0); err = libbpf_get_error(obj); if (err) return libbpf_err(-ENOENT); @@ -9476,7 +9483,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, bpf_program__set_expected_attach_type(prog, attach_type); } - if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { + if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { /* * we haven't guessed from section name and user * didn't provide a fallback type, too bad... @@ -9493,7 +9500,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, } bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) + if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) map->map_ifindex = attr->ifindex; } @@ -10527,7 +10534,7 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - attach_type = bpf_program__get_expected_attach_type(prog); + attach_type = bpf_program__expected_attach_type(prog); link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts); if (link_fd < 0) { link_fd = -errno; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 94670066de62..c8d8daad212e 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -180,9 +180,11 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); /* deprecated bpf_object__open variants */ +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead") LIBBPF_API struct bpf_object * bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, const char *name); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead") LIBBPF_API struct bpf_object * bpf_object__open_xattr(struct bpf_object_open_attr *attr); @@ -244,8 +246,10 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev); (pos) = (tmp), (tmp) = bpf_object__next(tmp)) typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, bpf_object_clear_priv_t clear_priv); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog); LIBBPF_API int @@ -277,9 +281,10 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog) typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, bpf_program_clear_priv_t clear_priv); - +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); @@ -591,26 +596,39 @@ LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); /* * Adjust type of BPF program. Default is kprobe. */ +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); -LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); +LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); LIBBPF_API enum bpf_attach_type -bpf_program__get_expected_attach_type(const struct bpf_program *prog); +bpf_program__expected_attach_type(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); @@ -631,18 +649,31 @@ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog); /* @@ -716,6 +747,7 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); /* get/set map size (max_entries) */ LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead") LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); /* get/set map flags */ LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); @@ -740,8 +772,10 @@ LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); @@ -758,7 +792,6 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); */ LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); -LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index e10f0822845a..aef6253a90c8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -424,10 +424,12 @@ LIBBPF_0.6.0 { LIBBPF_0.7.0 { global: bpf_btf_load; + bpf_program__expected_attach_type; bpf_program__log_buf; bpf_program__log_level; bpf_program__set_log_buf; bpf_program__set_log_level; + bpf_program__type; bpf_xdp_attach; bpf_xdp_detach; bpf_xdp_query; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 1565679eb432..bc86b82e90d1 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -92,6 +92,9 @@ # define offsetofend(TYPE, FIELD) \ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +#ifndef __alias +#define __alias(symbol) __attribute__((alias(#symbol))) +#endif /* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is * a string literal known at compilation time or char * pointer known only at diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 3c2b281c2bc3..a283cf031665 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -86,6 +86,23 @@ LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS +/* "Discouraged" APIs which don't follow consistent libbpf naming patterns. + * They are normally a trivial aliases or wrappers for proper APIs and are + * left to minimize unnecessary disruption for users of libbpf. But they + * shouldn't be used going forward. + */ + +struct bpf_program; +struct bpf_map; +struct btf; +struct btf_ext; + +LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); +LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); +LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0b84d8e6b72a..dcd3336512d4 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -70,19 +70,85 @@ static inline int skel_closenz(int fd) return -EINVAL; } +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) +#endif + +static inline int skel_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, + __u32 value_size, + __u32 max_entries) +{ + const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + + attr.map_type = map_type; + strncpy(attr.map_name, map_name, sizeof(attr.map_name)); + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + + return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz); +} + +static inline int skel_map_update_elem(int fd, const void *key, + const void *value, __u64 flags) +{ + const size_t attr_sz = offsetofend(union bpf_attr, flags); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.map_fd = fd; + attr.key = (long) key; + attr.value = (long) value; + attr.flags = flags; + + return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); +} + +static inline int skel_raw_tracepoint_open(const char *name, int prog_fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.raw_tracepoint.name = (long) name; + attr.raw_tracepoint.prog_fd = prog_fd; + + return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); +} + +static inline int skel_link_create(int prog_fd, int target_fd, + enum bpf_attach_type attach_type) +{ + const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.link_create.prog_fd = prog_fd; + attr.link_create.target_fd = target_fd; + attr.link_create.attach_type = attach_type; + + return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz); +} + static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) { int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL); + map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1); if (map_fd < 0) { opts->errstr = "failed to create loader map"; err = -errno; goto out; } - err = bpf_map_update_elem(map_fd, &key, opts->data, 0); + err = skel_map_update_elem(map_fd, &key, opts->data, 0); if (err < 0) { opts->errstr = "failed to update loader map"; err = -errno; diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 8ac0a3a457ef..0bc25a56cfef 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -13,7 +13,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { struct bpf_object *obj; - obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); + obj = bpf_object__open_mem(obj_buf, obj_buf_sz, NULL); if (libbpf_get_error(obj)) return TEST_FAIL; bpf_object__close(obj); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index ef2832b4d5cc..3cd5ae200f2c 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -54,6 +54,7 @@ static bool libbpf_initialized; struct bpf_object * bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) { + LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name); struct bpf_object *obj; if (!libbpf_initialized) { @@ -61,7 +62,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) libbpf_initialized = true; } - obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); + obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load buffer\n"); return ERR_PTR(-EINVAL); @@ -72,6 +73,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) struct bpf_object *bpf__prepare_load(const char *filename, bool source) { + LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename); struct bpf_object *obj; if (!libbpf_initialized) { @@ -94,7 +96,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); } else pr_debug("bpf: successful builtin compilation\n"); - obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); + obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) llvm__dump_obj(filename, obj_buf, obj_buf_sz); @@ -654,11 +656,11 @@ int bpf__probe(struct bpf_object *obj) } if (priv->is_tp) { - bpf_program__set_tracepoint(prog); + bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); continue; } - bpf_program__set_kprobe(prog); + bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE); pev = &priv->pev; err = convert_perf_probe_events(pev, 1); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 945f92d71db3..91ea729990da 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -330,7 +330,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ - map_ptr_kern.c core_kern.c + map_ptr_kern.c core_kern.c core_kern_overflow.c # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c SKEL_BLACKLIST += $$(LSKELS) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 42ef250c7acc..d099d91adc3b 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -206,6 +206,8 @@ btf_tag test and Clang version The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_]. +The subtests ``btf_type_tag_user_{mod1, mod2, vmlinux}`` also requires +pahole version ``1.23``. Without them, the btf_tag selftest will be skipped and you will observe: diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index da8593b3494a..c2554f9695ff 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -151,7 +151,7 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void) /* record data + header take 16 bytes */ skel->rodata->wakeup_data_size = args.sample_rate * 16; - bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz); + bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz); if (ringbuf_bench__load(skel)) { fprintf(stderr, "failed to load skeleton\n"); diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 7f957c55a3ca..0c481de2833d 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -154,7 +154,6 @@ static void *uprobe_producer_without_nop(void *input) static void usetup(bool use_retprobe, bool use_nop) { size_t uprobe_offset; - ssize_t base_addr; struct bpf_link *link; setup_libbpf(); @@ -165,11 +164,10 @@ static void usetup(bool use_retprobe, bool use_nop) exit(1); } - base_addr = get_base_addr(); if (use_nop) - uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr); + uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop); else - uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr); + uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop); link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, use_retprobe, diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index bdbacf5adcd2..27d63be47b95 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -13,6 +13,10 @@ #define CREATE_TRACE_POINTS #include "bpf_testmod-events.h" +typedef int (*func_proto_typedef)(long); +typedef int (*func_proto_typedef_nested1)(func_proto_typedef); +typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1); + DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; noinline void @@ -21,6 +25,27 @@ bpf_testmod_test_mod_kfunc(int i) *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; } +struct bpf_testmod_btf_type_tag_1 { + int a; +}; + +struct bpf_testmod_btf_type_tag_2 { + struct bpf_testmod_btf_type_tag_1 __user *p; +}; + +noinline int +bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) { + BTF_TYPE_EMIT(func_proto_typedef); + BTF_TYPE_EMIT(func_proto_typedef_nested1); + BTF_TYPE_EMIT(func_proto_typedef_nested2); + return arg->a; +} + +noinline int +bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) { + return arg->p->a; +} + noinline int bpf_testmod_loop_test(int n) { int i, sum = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 86b7d5d84eec..ab62aba10e2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -7,18 +7,18 @@ static void test_add(struct atomics_lskel *skel) { int err, prog_fd; - __u32 duration = 0, retval; int link_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); link_fd = atomics_lskel__add__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(add)")) return; prog_fd = skel->progs.add.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run add", - "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) goto cleanup; ASSERT_EQ(skel->data->add64_value, 3, "add64_value"); @@ -39,19 +39,18 @@ cleanup: static void test_sub(struct atomics_lskel *skel) { int err, prog_fd; - __u32 duration = 0, retval; int link_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); link_fd = atomics_lskel__sub__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(sub)")) return; prog_fd = skel->progs.sub.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run sub", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) goto cleanup; ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value"); @@ -72,18 +71,18 @@ cleanup: static void test_and(struct atomics_lskel *skel) { int err, prog_fd; - __u32 duration = 0, retval; int link_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); link_fd = atomics_lskel__and__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(and)")) return; prog_fd = skel->progs.and.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run and", - "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) goto cleanup; ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value"); @@ -100,19 +99,18 @@ cleanup: static void test_or(struct atomics_lskel *skel) { int err, prog_fd; - __u32 duration = 0, retval; int link_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); link_fd = atomics_lskel__or__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(or)")) return; prog_fd = skel->progs.or.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run or", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) goto cleanup; ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value"); @@ -129,18 +127,18 @@ cleanup: static void test_xor(struct atomics_lskel *skel) { int err, prog_fd; - __u32 duration = 0, retval; int link_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); link_fd = atomics_lskel__xor__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xor)")) return; prog_fd = skel->progs.xor.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run xor", - "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) goto cleanup; ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value"); @@ -157,18 +155,18 @@ cleanup: static void test_cmpxchg(struct atomics_lskel *skel) { int err, prog_fd; - __u32 duration = 0, retval; int link_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); link_fd = atomics_lskel__cmpxchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)")) return; prog_fd = skel->progs.cmpxchg.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run cmpxchg", - "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) goto cleanup; ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value"); @@ -186,18 +184,18 @@ cleanup: static void test_xchg(struct atomics_lskel *skel) { int err, prog_fd; - __u32 duration = 0, retval; int link_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); link_fd = atomics_lskel__xchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xchg)")) return; prog_fd = skel->progs.xchg.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run xchg", - "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) goto cleanup; ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value"); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index d0bd51eb23c8..d48f6e533e1e 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -5,9 +5,10 @@ /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); -/* attach point */ -static void method(void) { - return ; +/* uprobe attach point */ +static void trigger_func(void) +{ + asm volatile (""); } void test_attach_probe(void) @@ -17,8 +18,7 @@ void test_attach_probe(void) struct bpf_link *kprobe_link, *kretprobe_link; struct bpf_link *uprobe_link, *uretprobe_link; struct test_attach_probe* skel; - size_t uprobe_offset; - ssize_t base_addr, ref_ctr_offset; + ssize_t uprobe_offset, ref_ctr_offset; bool legacy; /* Check if new-style kprobe/uprobe API is supported. @@ -34,11 +34,9 @@ void test_attach_probe(void) */ legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0; - base_addr = get_base_addr(); - if (CHECK(base_addr < 0, "get_base_addr", - "failed to find base addr: %zd", base_addr)) + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) return; - uprobe_offset = get_uprobe_offset(&method, base_addr); ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) @@ -103,7 +101,7 @@ void test_attach_probe(void) goto cleanup; /* trigger & validate uprobe & uretprobe */ - method(); + trigger_func(); if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", "wrong uprobe res: %d\n", skel->bss->uprobe_res)) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 5eea3c3a40fe..cd10df6cd0fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -8,6 +8,12 @@ #include <test_progs.h> #include "test_bpf_cookie.skel.h" +/* uprobe attach point */ +static void trigger_func(void) +{ + asm volatile (""); +} + static void kprobe_subtest(struct test_bpf_cookie *skel) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); @@ -62,11 +68,11 @@ static void uprobe_subtest(struct test_bpf_cookie *skel) DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); struct bpf_link *link1 = NULL, *link2 = NULL; struct bpf_link *retlink1 = NULL, *retlink2 = NULL; - size_t uprobe_offset; - ssize_t base_addr; + ssize_t uprobe_offset; - base_addr = get_base_addr(); - uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr); + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) + goto cleanup; /* attach two uprobes */ opts.bpf_cookie = 0x100; @@ -99,7 +105,7 @@ static void uprobe_subtest(struct test_bpf_cookie *skel) goto cleanup; /* trigger uprobe && uretprobe */ - get_base_addr(); + trigger_func(); ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res"); ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res"); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index b84f859b1267..5142a7d130b2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -138,6 +138,24 @@ static void test_task(void) bpf_iter_task__destroy(skel); } +static void test_task_sleepable(void) +{ + struct bpf_iter_task *skel; + + skel = bpf_iter_task__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load")) + return; + + do_dummy_read(skel->progs.dump_task_sleepable); + + ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0, + "num_expected_failure_copy_from_user_task"); + ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0, + "num_success_copy_from_user_task"); + + bpf_iter_task__destroy(skel); +} + static void test_task_stack(void) { struct bpf_iter_task_stack *skel; @@ -1252,6 +1270,8 @@ void test_bpf_iter(void) test_bpf_map(); if (test__start_subtest("task")) test_task(); + if (test__start_subtest("task_sleepable")) + test_task_sleepable(); if (test__start_subtest("task_stack")) test_task_stack(); if (test__start_subtest("task_file")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index e3166a81e989..dd30b1e3a67c 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -11,7 +11,12 @@ enum { void test_bpf_nf_ct(int mode) { struct test_bpf_nf *skel; - int prog_fd, err, retval; + int prog_fd, err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); skel = test_bpf_nf__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) @@ -22,8 +27,7 @@ void test_bpf_nf_ct(int mode) else prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test); - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, - (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(prog_fd, &topts); if (!ASSERT_OK(err, "bpf_prog_test_run")) goto end; diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 14f9b6136794..8b652f5ce423 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3939,6 +3939,25 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid component_idx", }, { + .descr = "decl_tag test #15, func, invalid func proto", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), /* [2] */ + BTF_FUNC_ENC(NAME_TBD, 8), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ .descr = "type_tag test #1", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -4561,7 +4580,7 @@ static void do_test_file(unsigned int test_num) btf_ext__free(btf_ext); /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */ - libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS); + libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); obj = bpf_object__open(test->file); err = libbpf_get_error(obj); if (CHECK(err, "obj: %d", err)) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c index 88d63e23e35f..f7560b54a6bb 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c @@ -1,19 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ #include <test_progs.h> -#include "btf_decl_tag.skel.h" +#include <bpf/btf.h> +#include "test_btf_decl_tag.skel.h" /* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */ struct btf_type_tag_test { int **p; }; #include "btf_type_tag.skel.h" +#include "btf_type_tag_user.skel.h" static void test_btf_decl_tag(void) { - struct btf_decl_tag *skel; + struct test_btf_decl_tag *skel; - skel = btf_decl_tag__open_and_load(); + skel = test_btf_decl_tag__open_and_load(); if (!ASSERT_OK_PTR(skel, "btf_decl_tag")) return; @@ -22,7 +24,7 @@ static void test_btf_decl_tag(void) test__skip(); } - btf_decl_tag__destroy(skel); + test_btf_decl_tag__destroy(skel); } static void test_btf_type_tag(void) @@ -41,10 +43,101 @@ static void test_btf_type_tag(void) btf_type_tag__destroy(skel); } +static void test_btf_type_tag_mod_user(bool load_test_user1) +{ + const char *module_name = "bpf_testmod"; + struct btf *vmlinux_btf, *module_btf; + struct btf_type_tag_user *skel; + __s32 type_id; + int err; + + if (!env.has_testmod) { + test__skip(); + return; + } + + /* skip the test if the module does not have __user tags */ + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) + return; + + module_btf = btf__load_module_btf(module_name, vmlinux_btf); + if (!ASSERT_OK_PTR(module_btf, "could not load module BTF")) + goto free_vmlinux_btf; + + type_id = btf__find_by_name_kind(module_btf, "user", BTF_KIND_TYPE_TAG); + if (type_id <= 0) { + printf("%s:SKIP: btf_type_tag attribute not in %s", __func__, module_name); + test__skip(); + goto free_module_btf; + } + + skel = btf_type_tag_user__open(); + if (!ASSERT_OK_PTR(skel, "btf_type_tag_user")) + goto free_module_btf; + + bpf_program__set_autoload(skel->progs.test_sys_getsockname, false); + if (load_test_user1) + bpf_program__set_autoload(skel->progs.test_user2, false); + else + bpf_program__set_autoload(skel->progs.test_user1, false); + + err = btf_type_tag_user__load(skel); + ASSERT_ERR(err, "btf_type_tag_user"); + + btf_type_tag_user__destroy(skel); + +free_module_btf: + btf__free(module_btf); +free_vmlinux_btf: + btf__free(vmlinux_btf); +} + +static void test_btf_type_tag_vmlinux_user(void) +{ + struct btf_type_tag_user *skel; + struct btf *vmlinux_btf; + __s32 type_id; + int err; + + /* skip the test if the vmlinux does not have __user tags */ + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) + return; + + type_id = btf__find_by_name_kind(vmlinux_btf, "user", BTF_KIND_TYPE_TAG); + if (type_id <= 0) { + printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__); + test__skip(); + goto free_vmlinux_btf; + } + + skel = btf_type_tag_user__open(); + if (!ASSERT_OK_PTR(skel, "btf_type_tag_user")) + goto free_vmlinux_btf; + + bpf_program__set_autoload(skel->progs.test_user2, false); + bpf_program__set_autoload(skel->progs.test_user1, false); + + err = btf_type_tag_user__load(skel); + ASSERT_ERR(err, "btf_type_tag_user"); + + btf_type_tag_user__destroy(skel); + +free_vmlinux_btf: + btf__free(vmlinux_btf); +} + void test_btf_tag(void) { if (test__start_subtest("btf_decl_tag")) test_btf_decl_tag(); if (test__start_subtest("btf_type_tag")) test_btf_type_tag(); + if (test__start_subtest("btf_type_tag_user_mod1")) + test_btf_type_tag_mod_user(true); + if (test__start_subtest("btf_type_tag_user_mod2")) + test_btf_type_tag_mod_user(false); + if (test__start_subtest("btf_type_tag_sys_user_vmlinux")) + test_btf_type_tag_vmlinux_user(); } diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index f73e6e36b74d..12f4395f18b3 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -79,28 +79,21 @@ static void test_check_mtu_run_xdp(struct test_check_mtu *skel, struct bpf_program *prog, __u32 mtu_expect) { - const char *prog_name = bpf_program__name(prog); int retval_expect = XDP_PASS; __u32 mtu_result = 0; char buf[256] = {}; - int err; - struct bpf_prog_test_run_attr tattr = { + int err, prog_fd = bpf_program__fd(prog); + LIBBPF_OPTS(bpf_test_run_opts, topts, .repeat = 1, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .data_out = buf, .data_size_out = sizeof(buf), - .prog_fd = bpf_program__fd(prog), - }; - - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err != 0, "bpf_prog_test_run", - "prog_name:%s (err %d errno %d retval %d)\n", - prog_name, err, errno, tattr.retval); + ); - CHECK(tattr.retval != retval_expect, "retval", - "progname:%s unexpected retval=%d expected=%d\n", - prog_name, tattr.retval, retval_expect); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, retval_expect, "retval"); /* Extract MTU that BPF-prog got */ mtu_result = skel->bss->global_bpf_mtu_xdp; @@ -139,28 +132,21 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel, struct bpf_program *prog, __u32 mtu_expect) { - const char *prog_name = bpf_program__name(prog); int retval_expect = BPF_OK; __u32 mtu_result = 0; char buf[256] = {}; - int err; - struct bpf_prog_test_run_attr tattr = { - .repeat = 1, + int err, prog_fd = bpf_program__fd(prog); + LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .data_out = buf, .data_size_out = sizeof(buf), - .prog_fd = bpf_program__fd(prog), - }; - - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err != 0, "bpf_prog_test_run", - "prog_name:%s (err %d errno %d retval %d)\n", - prog_name, err, errno, tattr.retval); + .repeat = 1, + ); - CHECK(tattr.retval != retval_expect, "retval", - "progname:%s unexpected retval=%d expected=%d\n", - prog_name, tattr.retval, retval_expect); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, retval_expect, "retval"); /* Extract MTU that BPF-prog got */ mtu_result = skel->bss->global_bpf_mtu_tc; diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index e075d03ab630..224f016b0a53 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -161,7 +161,7 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family) } } -static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr) +static bool was_decapsulated(struct bpf_test_run_opts *tattr) { return tattr->data_size_out < tattr->data_size_in; } @@ -367,12 +367,12 @@ static void close_fds(int *fds, int n) static void test_cls_redirect_common(struct bpf_program *prog) { - struct bpf_prog_test_run_attr tattr = {}; + LIBBPF_OPTS(bpf_test_run_opts, tattr); int families[] = { AF_INET, AF_INET6 }; struct sockaddr_storage ss; struct sockaddr *addr; socklen_t slen; - int i, j, err; + int i, j, err, prog_fd; int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; @@ -394,7 +394,7 @@ static void test_cls_redirect_common(struct bpf_program *prog) goto cleanup; } - tattr.prog_fd = bpf_program__fd(prog); + prog_fd = bpf_program__fd(prog); for (i = 0; i < ARRAY_SIZE(tests); i++) { struct test_cfg *test = &tests[i]; @@ -415,7 +415,7 @@ static void test_cls_redirect_common(struct bpf_program *prog) if (CHECK_FAIL(!tattr.data_size_in)) continue; - err = bpf_prog_test_run_xattr(&tattr); + err = bpf_prog_test_run_opts(prog_fd, &tattr); if (CHECK_FAIL(err)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c index 561c5185d886..6a5a1c019a5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_kern.c +++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c @@ -7,8 +7,22 @@ void test_core_kern_lskel(void) { struct core_kern_lskel *skel; + int link_fd; skel = core_kern_lskel__open_and_load(); - ASSERT_OK_PTR(skel, "open_and_load"); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + link_fd = core_kern_lskel__core_relo_proto__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(core_relo_proto)")) + goto cleanup; + + /* trigger tracepoints */ + usleep(1); + ASSERT_TRUE(skel->bss->proto_out[0], "bpf_core_type_exists"); + ASSERT_FALSE(skel->bss->proto_out[1], "!bpf_core_type_exists"); + ASSERT_TRUE(skel->bss->proto_out[2], "bpf_core_type_exists. nested"); + +cleanup: core_kern_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c new file mode 100644 index 000000000000..04cc145bc26a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "test_progs.h" +#include "core_kern_overflow.lskel.h" + +void test_core_kern_overflow_lskel(void) +{ + struct core_kern_overflow_lskel *skel; + + skel = core_kern_overflow_lskel__open_and_load(); + if (!ASSERT_NULL(skel, "open_and_load")) + core_kern_overflow_lskel__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c index cbaa44ffb8c6..5aa52cc31dc2 100644 --- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c +++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c @@ -26,10 +26,10 @@ static void test_dummy_st_ops_attach(void) static void test_dummy_init_ret_value(void) { __u64 args[1] = {0}; - struct bpf_prog_test_run_attr attr = { - .ctx_size_in = sizeof(args), + LIBBPF_OPTS(bpf_test_run_opts, attr, .ctx_in = args, - }; + .ctx_size_in = sizeof(args), + ); struct dummy_st_ops *skel; int fd, err; @@ -38,8 +38,7 @@ static void test_dummy_init_ret_value(void) return; fd = bpf_program__fd(skel->progs.test_1); - attr.prog_fd = fd; - err = bpf_prog_test_run_xattr(&attr); + err = bpf_prog_test_run_opts(fd, &attr); ASSERT_OK(err, "test_run"); ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret"); @@ -53,10 +52,10 @@ static void test_dummy_init_ptr_arg(void) .val = exp_retval, }; __u64 args[1] = {(unsigned long)&in_state}; - struct bpf_prog_test_run_attr attr = { - .ctx_size_in = sizeof(args), + LIBBPF_OPTS(bpf_test_run_opts, attr, .ctx_in = args, - }; + .ctx_size_in = sizeof(args), + ); struct dummy_st_ops *skel; int fd, err; @@ -65,8 +64,7 @@ static void test_dummy_init_ptr_arg(void) return; fd = bpf_program__fd(skel->progs.test_1); - attr.prog_fd = fd; - err = bpf_prog_test_run_xattr(&attr); + err = bpf_prog_test_run_opts(fd, &attr); ASSERT_OK(err, "test_run"); ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret"); ASSERT_EQ(attr.retval, exp_retval, "test_ret"); @@ -77,10 +75,10 @@ static void test_dummy_init_ptr_arg(void) static void test_dummy_multiple_args(void) { __u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL}; - struct bpf_prog_test_run_attr attr = { - .ctx_size_in = sizeof(args), + LIBBPF_OPTS(bpf_test_run_opts, attr, .ctx_in = args, - }; + .ctx_size_in = sizeof(args), + ); struct dummy_st_ops *skel; int fd, err; size_t i; @@ -91,8 +89,7 @@ static void test_dummy_multiple_args(void) return; fd = bpf_program__fd(skel->progs.test_2); - attr.prog_fd = fd; - err = bpf_prog_test_run_xattr(&attr); + err = bpf_prog_test_run_opts(fd, &attr); ASSERT_OK(err, "test_run"); for (i = 0; i < ARRAY_SIZE(args); i++) { snprintf(name, sizeof(name), "arg %zu", i); diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 4374ac8a8a91..130f5b82d2e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -9,38 +9,34 @@ void test_fentry_fexit(void) struct fentry_test_lskel *fentry_skel = NULL; struct fexit_test_lskel *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; - __u32 duration = 0, retval; int err, prog_fd, i; + LIBBPF_OPTS(bpf_test_run_opts, topts); fentry_skel = fentry_test_lskel__open_and_load(); - if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) + if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto close_prog; fexit_skel = fexit_test_lskel__open_and_load(); - if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto close_prog; err = fentry_test_lskel__attach(fentry_skel); - if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) + if (!ASSERT_OK(err, "fentry_attach")) goto close_prog; err = fexit_test_lskel__attach(fexit_skel); - if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + if (!ASSERT_OK(err, "fexit_attach")) goto close_prog; prog_fd = fexit_skel->progs.test1.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_OK(topts.retval, "ipv6 test retval"); fentry_res = (__u64 *)fentry_skel->bss; fexit_res = (__u64 *)fexit_skel->bss; printf("%lld\n", fentry_skel->bss->test1_result); for (i = 0; i < 8; i++) { - CHECK(fentry_res[i] != 1, "result", - "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]); - CHECK(fexit_res[i] != 1, "result", - "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]); + ASSERT_EQ(fentry_res[i], 1, "fentry result"); + ASSERT_EQ(fexit_res[i], 1, "fexit result"); } close_prog: diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 12921b3850d2..c0d1d61d5f66 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -6,9 +6,9 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel) { int err, prog_fd, i; - __u32 duration = 0, retval; int link_fd; __u64 *result; + LIBBPF_OPTS(bpf_test_run_opts, topts); err = fentry_test_lskel__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) @@ -20,10 +20,9 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel) return -1; prog_fd = fentry_skel->progs.test1.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(retval, 0, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); result = (__u64 *)fentry_skel->bss; for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index c52f99f6a909..d9aad15e0d24 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -58,12 +58,17 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, test_cb cb) { struct bpf_object *obj = NULL, *tgt_obj; - __u32 retval, tgt_prog_id, info_len; + __u32 tgt_prog_id, info_len; struct bpf_prog_info prog_info = {}; struct bpf_program **prog = NULL, *p; struct bpf_link **link = NULL; int err, tgt_fd, i; struct btf *btf; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v6, + .data_size_in = sizeof(pkt_v6), + .repeat = 1, + ); err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); @@ -132,7 +137,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, &link_info, &info_len); ASSERT_OK(err, "link_fd_get_info"); ASSERT_EQ(link_info.tracing.attach_type, - bpf_program__get_expected_attach_type(prog[i]), + bpf_program__expected_attach_type(prog[i]), "link_attach_type"); ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id"); ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id"); @@ -147,10 +152,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, if (!run_prog) goto close_prog; - err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), - NULL, NULL, &retval, NULL); + err = bpf_prog_test_run_opts(tgt_fd, &topts); ASSERT_OK(err, "prog_run"); - ASSERT_EQ(retval, 0, "prog_run_ret"); + ASSERT_EQ(topts.retval, 0, "prog_run_ret"); if (check_data_map(obj, prog_cnt, false)) goto close_prog; @@ -225,29 +229,31 @@ static int test_second_attach(struct bpf_object *obj) const char *tgt_obj_file = "./test_pkt_access.o"; struct bpf_program *prog = NULL; struct bpf_object *tgt_obj; - __u32 duration = 0, retval; struct bpf_link *link; int err = 0, tgt_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v6, + .data_size_in = sizeof(pkt_v6), + .repeat = 1, + ); prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) + if (!ASSERT_OK_PTR(prog, "find_prog")) return -ENOENT; err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); - if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n", - tgt_obj_file, err, errno)) + if (!ASSERT_OK(err, "second_prog_load")) return err; link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name); if (!ASSERT_OK_PTR(link, "second_link")) goto out; - err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), - NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "ipv6", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration)) + err = bpf_prog_test_run_opts(tgt_fd, &topts); + if (!ASSERT_OK(err, "ipv6 test_run")) + goto out; + if (!ASSERT_OK(topts.retval, "ipv6 retval")) goto out; err = check_data_map(obj, 1, true); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index e4cede6b4b2d..3ee2107bbf7a 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -10,9 +10,7 @@ void test_fexit_stress(void) char test_skb[128] = {}; int fexit_fd[CNT] = {}; int link_fd[CNT] = {}; - __u32 duration = 0; char error[4096]; - __u32 prog_ret; int err, i, filter_fd; const struct bpf_insn trace_program[] = { @@ -36,9 +34,15 @@ void test_fexit_stress(void) .log_size = sizeof(error), ); + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = test_skb, + .data_size_in = sizeof(test_skb), + .repeat = 1, + ); + err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", trace_opts.expected_attach_type); - if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err)) + if (!ASSERT_GT(err, 0, "find_vmlinux_btf_id")) goto out; trace_opts.attach_btf_id = err; @@ -47,24 +51,20 @@ void test_fexit_stress(void) trace_program, sizeof(trace_program) / sizeof(struct bpf_insn), &trace_opts); - if (CHECK(fexit_fd[i] < 0, "fexit loaded", - "failed: %d errno %d\n", fexit_fd[i], errno)) + if (!ASSERT_GE(fexit_fd[i], 0, "fexit load")) goto out; link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]); - if (CHECK(link_fd[i] < 0, "fexit attach failed", - "prog %d failed: %d err %d\n", i, link_fd[i], errno)) + if (!ASSERT_GE(link_fd[i], 0, "fexit attach")) goto out; } filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), &skb_opts); - if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", - filter_fd, errno)) + if (!ASSERT_GE(filter_fd, 0, "test_program_loaded")) goto out; - err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, - 0, &prog_ret, 0); + err = bpf_prog_test_run_opts(filter_fd, &topts); close(filter_fd); CHECK_FAIL(err); out: diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index d4887d8bb396..101b7343036b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -6,9 +6,9 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel) { int err, prog_fd, i; - __u32 duration = 0, retval; int link_fd; __u64 *result; + LIBBPF_OPTS(bpf_test_run_opts, topts); err = fexit_test_lskel__attach(fexit_skel); if (!ASSERT_OK(err, "fexit_attach")) @@ -20,10 +20,9 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel) return -1; prog_fd = fexit_skel->progs.test1.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(retval, 0, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); result = (__u64 *)fexit_skel->bss; for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index dfafd62df50b..0c1661ea996e 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -13,8 +13,9 @@ #endif #define CHECK_FLOW_KEYS(desc, got, expected) \ - CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \ + _CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \ desc, \ + topts.duration, \ "nhoff=%u/%u " \ "thoff=%u/%u " \ "addr_proto=0x%x/0x%x " \ @@ -487,7 +488,7 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) /* Keep in sync with 'flags' from eth_get_headlen. */ __u32 eth_get_headlen_flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG; - struct bpf_prog_test_run_attr tattr = {}; + LIBBPF_OPTS(bpf_test_run_opts, topts); struct bpf_flow_keys flow_keys = {}; __u32 key = (__u32)(tests[i].keys.sport) << 16 | tests[i].keys.dport; @@ -503,13 +504,12 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); - CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); + ASSERT_OK(err, "bpf_map_lookup_elem"); - CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); err = bpf_map_delete_elem(keys_fd, &key); - CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err); + ASSERT_OK(err, "bpf_map_delete_elem"); } } @@ -573,27 +573,24 @@ void test_flow_dissector(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys; - struct bpf_prog_test_run_attr tattr = { - .prog_fd = prog_fd, + LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &tests[i].pkt, .data_size_in = sizeof(tests[i].pkt), .data_out = &flow_keys, - }; + ); static struct bpf_flow_keys ctx = {}; if (tests[i].flags) { - tattr.ctx_in = &ctx; - tattr.ctx_size_in = sizeof(ctx); + topts.ctx_in = &ctx; + topts.ctx_size_in = sizeof(ctx); ctx.flags = tests[i].flags; } - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) || - err || tattr.retval != 1, - tests[i].name, - "err %d errno %d retval %d duration %d size %u/%zu\n", - err, errno, tattr.retval, tattr.duration, - tattr.data_size_out, sizeof(flow_keys)); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 1, "test_run retval"); + ASSERT_EQ(topts.data_size_out, sizeof(flow_keys), + "test_run data_size_out"); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c index 93ac3f28226c..36afb409c25f 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -5,7 +5,6 @@ void serial_test_flow_dissector_load_bytes(void) { struct bpf_flow_keys flow_keys; - __u32 duration = 0, retval, size; struct bpf_insn prog[] = { // BPF_REG_1 - 1st argument: context // BPF_REG_2 - 2nd argument: offset, start at first byte @@ -27,22 +26,25 @@ void serial_test_flow_dissector_load_bytes(void) BPF_EXIT_INSN(), }; int fd, err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = &flow_keys, + .data_size_out = sizeof(flow_keys), + .repeat = 1, + ); /* make sure bpf_skb_load_bytes is not allowed from skb-less context */ fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); - CHECK(fd < 0, - "flow_dissector-bpf_skb_load_bytes-load", - "fd %d errno %d\n", - fd, errno); + ASSERT_GE(fd, 0, "bpf_test_load_program good fd"); - err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4), - &flow_keys, &size, &retval, &duration); - CHECK(size != sizeof(flow_keys) || err || retval != 1, - "flow_dissector-bpf_skb_load_bytes", - "err %d errno %d retval %d duration %d size %u/%zu\n", - err, errno, retval, duration, size, sizeof(flow_keys)); + err = bpf_prog_test_run_opts(fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.data_size_out, sizeof(flow_keys), + "test_run data_size_out"); + ASSERT_EQ(topts.retval, 1, "test_run retval"); if (fd >= -1) close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 68eb12a287d4..044df13ee069 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -12,8 +12,13 @@ static void test_hash_map(void) int i, err, hashmap_fd, max_entries, percpu_map_fd; struct for_each_hash_map_elem *skel; __u64 *percpu_valbuf = NULL; - __u32 key, num_cpus, retval; + __u32 key, num_cpus; __u64 val; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); skel = for_each_hash_map_elem__open_and_load(); if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load")) @@ -42,11 +47,10 @@ static void test_hash_map(void) if (!ASSERT_OK(err, "percpu_map_update")) goto out; - err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access), - 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, - &retval, &duration); - if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n", - err, errno, retval)) + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + duration = topts.duration; + if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n", + err, errno, topts.retval)) goto out; ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output"); @@ -69,11 +73,16 @@ out: static void test_array_map(void) { - __u32 key, num_cpus, max_entries, retval; + __u32 key, num_cpus, max_entries; int i, arraymap_fd, percpu_map_fd, err; struct for_each_array_map_elem *skel; __u64 *percpu_valbuf = NULL; __u64 val, expected_total; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); skel = for_each_array_map_elem__open_and_load(); if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load")) @@ -106,11 +115,10 @@ static void test_array_map(void) if (!ASSERT_OK(err, "percpu_map_update")) goto out; - err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access), - 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, - &retval, &duration); - if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n", - err, errno, retval)) + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + duration = topts.duration; + if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n", + err, errno, topts.retval)) goto out; ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output"); diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c index 85c427119fe9..28cf63963cb7 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c @@ -5,8 +5,8 @@ void test_get_func_args_test(void) { struct get_func_args_test *skel = NULL; - __u32 duration = 0, retval; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); skel = get_func_args_test__open_and_load(); if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load")) @@ -20,19 +20,17 @@ void test_get_func_args_test(void) * fentry/fexit programs. */ prog_fd = bpf_program__fd(skel->progs.test1); - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(retval, 0, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); /* This runs bpf_modify_return_test function and triggers * fmod_ret_test and fexit_test programs. */ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(retval, 1234, "test_run"); + ASSERT_EQ(topts.retval, 1234, "test_run"); ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c index 02a465f36d59..938dbd4d7c2f 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -5,8 +5,8 @@ void test_get_func_ip_test(void) { struct get_func_ip_test *skel = NULL; - __u32 duration = 0, retval; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); skel = get_func_ip_test__open(); if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open")) @@ -29,14 +29,12 @@ void test_get_func_ip_test(void) goto cleanup; prog_fd = bpf_program__fd(skel->progs.test1); - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(retval, 0, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); prog_fd = bpf_program__fd(skel->progs.test5); - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c index 8d5a6023a1bb..5308de1ed478 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c @@ -27,7 +27,7 @@ void test_get_stackid_cannot_attach(void) return; /* override program type */ - bpf_program__set_perf_event(skel->progs.oncpu); + bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT); err = test_stacktrace_build_id__load(skel); if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index 917165e04427..6fb3d3155c35 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -132,24 +132,26 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) void test_global_data(void) { const char *file = "./test_global_data.o"; - __u32 duration = 0, retval; struct bpf_object *obj; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (CHECK(err, "load program", "error %d loading %s\n", err, file)) + if (!ASSERT_OK(err, "load program")) return; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "pass global data run", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "pass global data run err"); + ASSERT_OK(topts.retval, "pass global data run retval"); - test_global_data_number(obj, duration); - test_global_data_string(obj, duration); - test_global_data_struct(obj, duration); - test_global_data_rdonly(obj, duration); + test_global_data_number(obj, topts.duration); + test_global_data_string(obj, topts.duration); + test_global_data_struct(obj, topts.duration); + test_global_data_rdonly(obj, topts.duration); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c index 93a2439237b0..29039a36cce5 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c +++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c @@ -40,19 +40,21 @@ static void test_global_func_args0(struct bpf_object *obj) void test_global_func_args(void) { const char *file = "./test_global_func_args.o"; - __u32 retval; struct bpf_object *obj; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK(err, "load program", "error %d loading %s\n", err, file)) return; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "pass global func args run", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); test_global_func_args0(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index ce10d2fc3a6c..1cee6957285e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -53,24 +53,24 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) void serial_test_kfree_skb(void) { struct __sk_buff skb = {}; - struct bpf_prog_test_run_attr tattr = { + LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v6, .data_size_in = sizeof(pkt_v6), .ctx_in = &skb, .ctx_size_in = sizeof(skb), - }; + ); struct kfree_skb *skel = NULL; struct bpf_link *link; struct bpf_object *obj; struct perf_buffer *pb = NULL; - int err; + int err, prog_fd; bool passed = false; __u32 duration = 0; const int zero = 0; bool test_ok[2]; err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, - &obj, &tattr.prog_fd); + &obj, &prog_fd); if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) return; @@ -100,11 +100,9 @@ void serial_test_kfree_skb(void) goto close_prog; memcpy(skb.cb, &cb, sizeof(cb)); - err = bpf_prog_test_run_xattr(&tattr); - duration = tattr.duration; - CHECK(err || tattr.retval, "ipv6", - "err %d errno %d retval %d duration %d\n", - err, errno, tattr.retval, duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_OK(topts.retval, "ipv6 test_run retval"); /* read perf buffer */ err = perf_buffer__poll(pb, 100); diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index b39a4f09aefd..c00eb974eb85 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -9,29 +9,31 @@ static void test_main(void) { struct kfunc_call_test_lskel *skel; - int prog_fd, retval, err; + int prog_fd, err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); skel = kfunc_call_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; prog_fd = skel->progs.kfunc_call_test1.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "bpf_prog_test_run(test1)"); - ASSERT_EQ(retval, 12, "test1-retval"); + ASSERT_EQ(topts.retval, 12, "test1-retval"); prog_fd = skel->progs.kfunc_call_test2.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "bpf_prog_test_run(test2)"); - ASSERT_EQ(retval, 3, "test2-retval"); + ASSERT_EQ(topts.retval, 3, "test2-retval"); prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)"); - ASSERT_EQ(retval, 0, "test_ref_btf_id-retval"); + ASSERT_EQ(topts.retval, 0, "test_ref_btf_id-retval"); kfunc_call_test_lskel__destroy(skel); } @@ -39,17 +41,21 @@ static void test_main(void) static void test_subprog(void) { struct kfunc_call_test_subprog *skel; - int prog_fd, retval, err; + int prog_fd, err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); skel = kfunc_call_test_subprog__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1); - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "bpf_prog_test_run(test1)"); - ASSERT_EQ(retval, 10, "test1-retval"); + ASSERT_EQ(topts.retval, 10, "test1-retval"); ASSERT_NEQ(skel->data->active_res, -1, "active_res"); ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res"); @@ -59,17 +65,21 @@ static void test_subprog(void) static void test_subprog_lskel(void) { struct kfunc_call_test_subprog_lskel *skel; - int prog_fd, retval, err; + int prog_fd, err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); skel = kfunc_call_test_subprog_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; prog_fd = skel->progs.kfunc_call_test1.prog_fd; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "bpf_prog_test_run(test1)"); - ASSERT_EQ(retval, 10, "test1-retval"); + ASSERT_EQ(topts.retval, 10, "test1-retval"); ASSERT_NEQ(skel->data->active_res, -1, "active_res"); ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res"); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index d490ad80eccb..a1ebac70ec29 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -6,11 +6,15 @@ #include "test_ksyms_module.lskel.h" #include "test_ksyms_module.skel.h" -void test_ksyms_module_lskel(void) +static void test_ksyms_module_lskel(void) { struct test_ksyms_module_lskel *skel; - int retval; int err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); if (!env.has_testmod) { test__skip(); @@ -20,20 +24,24 @@ void test_ksyms_module_lskel(void) skel = test_ksyms_module_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load")) return; - err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(skel->progs.load.prog_fd, &topts); if (!ASSERT_OK(err, "bpf_prog_test_run")) goto cleanup; - ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(topts.retval, 0, "retval"); ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); cleanup: test_ksyms_module_lskel__destroy(skel); } -void test_ksyms_module_libbpf(void) +static void test_ksyms_module_libbpf(void) { struct test_ksyms_module *skel; - int retval, err; + int err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); if (!env.has_testmod) { test__skip(); @@ -43,11 +51,10 @@ void test_ksyms_module_libbpf(void) skel = test_ksyms_module__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open")) return; - err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4, - sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.load), &topts); if (!ASSERT_OK(err, "bpf_prog_test_run")) goto cleanup; - ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(topts.retval, 0, "retval"); ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); cleanup: test_ksyms_module__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 540ef28fabff..55f733ff4109 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -23,12 +23,16 @@ static void test_l4lb(const char *file) __u8 flags; } real_def = {.dst = MAGIC_VAL}; __u32 ch_key = 11, real_num = 3; - __u32 duration, retval, size; int err, i, prog_fd, map_fd; __u64 bytes = 0, pkts = 0; struct bpf_object *obj; char buf[128]; u32 *magic = (u32 *)buf; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = NUM_ITER, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) @@ -49,19 +53,24 @@ static void test_l4lb(const char *file) goto out; bpf_map_update_elem(map_fd, &real_num, &real_def, 0); - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); - CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || - *magic != MAGIC_VAL, "ipv4", - "err %d errno %d retval %d size %d magic %x\n", - err, errno, retval, size, *magic); + topts.data_in = &pkt_v4; + topts.data_size_in = sizeof(pkt_v4); - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), - buf, &size, &retval, &duration); - CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || - *magic != MAGIC_VAL, "ipv6", - "err %d errno %d retval %d size %d magic %x\n", - err, errno, retval, size, *magic); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv4 test_run retval"); + ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out"); + ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 magic"); + + topts.data_in = &pkt_v6; + topts.data_size_in = sizeof(pkt_v6); + topts.data_size_out = sizeof(buf); /* reset out size */ + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv6 test_run retval"); + ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out"); + ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 magic"); map_fd = bpf_find_map(__func__, obj, "stats"); if (map_fd < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index e469b023962b..1ef377a7e731 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -202,7 +202,7 @@ static void bpf_btf_load_log_buf(void) const void *raw_btf_data; __u32 raw_btf_size; struct btf *btf; - char *log_buf; + char *log_buf = NULL; int fd = -1; btf = btf__new_empty(); diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index 23d19e9cf26a..e4e99b37df64 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -4,14 +4,17 @@ static void *spin_lock_thread(void *arg) { - __u32 duration, retval; int err, prog_fd = *(u32 *) arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10000, + ); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run_opts err"); + ASSERT_OK(topts.retval, "test_run_opts retval"); - err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); pthread_exit(arg); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c index 273725504f11..43e502acf050 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -9,10 +9,16 @@ void test_map_ptr(void) { struct map_ptr_kern_lskel *skel; - __u32 duration = 0, retval; char buf[128]; int err; int page_size = getpagesize(); + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); skel = map_ptr_kern_lskel__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -26,14 +32,12 @@ void test_map_ptr(void) skel->bss->page_size = page_size; - err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4, - sizeof(pkt_v4), buf, NULL, &retval, NULL); + err = bpf_prog_test_run_opts(skel->progs.cg_skb.prog_fd, &topts); - if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) + if (!ASSERT_OK(err, "test_run")) goto cleanup; - if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval, - skel->bss->g_map_type, skel->bss->g_line)) + if (!ASSERT_NEQ(topts.retval, 0, "test_run retval")) goto cleanup; cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index b772fe30ce9b..5d9955af6247 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -15,39 +15,31 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) { struct modify_return *skel = NULL; int err, prog_fd; - __u32 duration = 0, retval; __u16 side_effect; __s16 ret; + LIBBPF_OPTS(bpf_test_run_opts, topts); skel = modify_return__open_and_load(); - if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_load")) goto cleanup; err = modify_return__attach(skel); - if (CHECK(err, "modify_return", "attach failed: %d\n", err)) + if (!ASSERT_OK(err, "modify_return__attach failed")) goto cleanup; skel->bss->input_retval = input_retval; prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0, - &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); - CHECK(err, "test_run", "err %d errno %d\n", err, errno); + side_effect = UPPER(topts.retval); + ret = LOWER(topts.retval); - side_effect = UPPER(retval); - ret = LOWER(retval); - - CHECK(ret != want_ret, "test_run", - "unexpected ret: %d, expected: %d\n", ret, want_ret); - CHECK(side_effect != want_side_effect, "modify_return", - "unexpected side_effect: %d\n", side_effect); - - CHECK(skel->bss->fentry_result != 1, "modify_return", - "fentry failed\n"); - CHECK(skel->bss->fexit_result != 1, "modify_return", - "fexit failed\n"); - CHECK(skel->bss->fmod_ret_result != 1, "modify_return", - "fmod_ret failed\n"); + ASSERT_EQ(ret, want_ret, "test_run ret"); + ASSERT_EQ(side_effect, want_side_effect, "modify_return side_effect"); + ASSERT_EQ(skel->bss->fentry_result, 1, "modify_return fentry_result"); + ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result"); + ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result"); cleanup: modify_return__destroy(skel); @@ -63,4 +55,3 @@ void serial_test_modify_return(void) 0 /* want_side_effect */, -EINVAL /* want_ret */); } - diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c index 6628710ec3c6..0bcccdc34fbc 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c @@ -6,23 +6,27 @@ void test_pkt_access(void) { const char *file = "./test_pkt_access.o"; struct bpf_object *obj; - __u32 duration, retval; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 100000, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv4", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "ipv4 test_run_opts err"); + ASSERT_OK(topts.retval, "ipv4 test_run_opts retval"); + + topts.data_in = &pkt_v6; + topts.data_size_in = sizeof(pkt_v6); + topts.data_size_out = 0; /* reset from last call */ + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "ipv6 test_run_opts err"); + ASSERT_OK(topts.retval, "ipv6 test_run_opts retval"); - err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c index c9d2d6a1bfcc..00ee1dd792aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c @@ -6,18 +6,20 @@ void test_pkt_md_access(void) { const char *file = "./test_pkt_md_access.o"; struct bpf_object *obj; - __u32 duration, retval; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run_opts err"); + ASSERT_OK(topts.retval, "test_run_opts retval"); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c new file mode 100644 index 000000000000..1ccd2bdf8fa8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +#include "test_pkt_access.skel.h" + +static const __u32 duration; + +static void check_run_cnt(int prog_fd, __u64 run_cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int err; + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd)) + return; + + CHECK(run_cnt != info.run_cnt, "run_cnt", + "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt); +} + +void test_prog_run_opts(void) +{ + struct test_pkt_access *skel; + int err, stats_fd = -1, prog_fd; + char buf[10] = {}; + __u64 run_cnt = 0; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = 5, + ); + + stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME); + if (!ASSERT_GE(stats_fd, 0, "enable_stats good fd")) + return; + + skel = test_pkt_access__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.test_pkt_access); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_EQ(errno, ENOSPC, "test_run errno"); + ASSERT_ERR(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + + ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), "test_run data_size_out"); + ASSERT_EQ(buf[5], 0, "overflow, BPF_PROG_TEST_RUN ignored size hint"); + + run_cnt += topts.repeat; + check_run_cnt(prog_fd, run_cnt); + + topts.data_out = NULL; + topts.data_size_out = 0; + topts.repeat = 2; + errno = 0; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(errno, "run_no_output errno"); + ASSERT_OK(err, "run_no_output err"); + ASSERT_OK(topts.retval, "run_no_output retval"); + + run_cnt += topts.repeat; + check_run_cnt(prog_fd, run_cnt); + +cleanup: + if (skel) + test_pkt_access__destroy(skel); + if (stats_fd >= 0) + close(stats_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c deleted file mode 100644 index 89fc98faf19e..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <test_progs.h> -#include <network_helpers.h> - -#include "test_pkt_access.skel.h" - -static const __u32 duration; - -static void check_run_cnt(int prog_fd, __u64 run_cnt) -{ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - int err; - - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd)) - return; - - CHECK(run_cnt != info.run_cnt, "run_cnt", - "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt); -} - -void test_prog_run_xattr(void) -{ - struct test_pkt_access *skel; - int err, stats_fd = -1; - char buf[10] = {}; - __u64 run_cnt = 0; - - struct bpf_prog_test_run_attr tattr = { - .repeat = 1, - .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), - .data_out = buf, - .data_size_out = 5, - }; - - stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME); - if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno)) - return; - - skel = test_pkt_access__open_and_load(); - if (CHECK_ATTR(!skel, "open_and_load", "failed\n")) - goto cleanup; - - tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access); - - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run", - "err %d errno %d retval %d\n", err, errno, tattr.retval); - - CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", - "incorrect output size, want %zu have %u\n", - sizeof(pkt_v4), tattr.data_size_out); - - CHECK_ATTR(buf[5] != 0, "overflow", - "BPF_PROG_TEST_RUN ignored size hint\n"); - - run_cnt += tattr.repeat; - check_run_cnt(tattr.prog_fd, run_cnt); - - tattr.data_out = NULL; - tattr.data_size_out = 0; - tattr.repeat = 2; - errno = 0; - - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err || errno || tattr.retval, "run_no_output", - "err %d errno %d retval %d\n", err, errno, tattr.retval); - - tattr.data_size_out = 1; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err); - - run_cnt += tattr.repeat; - check_run_cnt(tattr.prog_fd, run_cnt); - -cleanup: - if (skel) - test_pkt_access__destroy(skel); - if (stats_fd >= 0) - close(stats_fd); -} diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index b9822f914eeb..d2743fc10032 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -10,11 +10,18 @@ enum { static void test_queue_stack_map_by_type(int type) { const int MAP_SIZE = 32; - __u32 vals[MAP_SIZE], duration, retval, size, val; + __u32 vals[MAP_SIZE], val; int i, err, prog_fd, map_in_fd, map_out_fd; char file[32], buf[128]; struct bpf_object *obj; struct iphdr iph; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); /* Fill test values to be used */ for (i = 0; i < MAP_SIZE; i++) @@ -58,38 +65,37 @@ static void test_queue_stack_map_by_type(int type) pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5; } - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); - if (err || retval || size != sizeof(pkt_v4)) + topts.data_size_out = sizeof(buf); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (err || topts.retval || + topts.data_size_out != sizeof(pkt_v4)) break; memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); if (iph.daddr != val) break; } - CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val, - "bpf_map_pop_elem", - "err %d errno %d retval %d size %d iph->daddr %u\n", - err, errno, retval, size, iph.daddr); + ASSERT_OK(err, "bpf_map_pop_elem"); + ASSERT_OK(topts.retval, "bpf_map_pop_elem test retval"); + ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), + "bpf_map_pop_elem data_size_out"); + ASSERT_EQ(iph.daddr, val, "bpf_map_pop_elem iph.daddr"); /* Queue is empty, program should return TC_ACT_SHOT */ - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); - CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4), - "check-queue-stack-map-empty", - "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + topts.data_size_out = sizeof(buf); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "check-queue-stack-map-empty"); + ASSERT_EQ(topts.retval, 2 /* TC_ACT_SHOT */, + "check-queue-stack-map-empty test retval"); + ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), + "check-queue-stack-map-empty data_size_out"); /* Check that the program pushed elements correctly */ for (i = 0; i < MAP_SIZE; i++) { err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val); - if (err || val != vals[i] * 5) - break; + ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"); + ASSERT_EQ(val, vals[i] * 5, "bpf_map_push_elem val"); } - - CHECK(i != MAP_SIZE && (err || val != vals[i] * 5), - "bpf_map_push_elem", "err %d value %u\n", err, val); - out: pkt_v4.iph.saddr = 0; bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c index 41720a62c4fa..fe5b8fae2c36 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c @@ -5,59 +5,54 @@ #include "bpf/libbpf_internal.h" #include "test_raw_tp_test_run.skel.h" -static int duration; - void test_raw_tp_test_run(void) { - struct bpf_prog_test_run_attr test_attr = {}; int comm_fd = -1, err, nr_online, i, prog_fd; __u64 args[2] = {0x1234ULL, 0x5678ULL}; int expected_retval = 0x1234 + 0x5678; struct test_raw_tp_test_run *skel; char buf[] = "new_name"; bool *online = NULL; - DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, - .ctx_in = args, - .ctx_size_in = sizeof(args), - .flags = BPF_F_TEST_RUN_ON_CPU, - ); + LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = args, + .ctx_size_in = sizeof(args), + .flags = BPF_F_TEST_RUN_ON_CPU, + ); err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online, &nr_online); - if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err)) + if (!ASSERT_OK(err, "parse_cpu_mask_file")) return; skel = test_raw_tp_test_run__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "skel_open")) goto cleanup; err = test_raw_tp_test_run__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "skel_attach")) goto cleanup; comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); - if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno)) + if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm")) goto cleanup; err = write(comm_fd, buf, sizeof(buf)); - CHECK(err < 0, "task rename", "err %d", errno); + ASSERT_GE(err, 0, "task rename"); - CHECK(skel->bss->count == 0, "check_count", "didn't increase\n"); - CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n"); + ASSERT_NEQ(skel->bss->count, 0, "check_count"); + ASSERT_EQ(skel->data->on_cpu, 0xffffffff, "check_on_cpu"); prog_fd = bpf_program__fd(skel->progs.rename); - test_attr.prog_fd = prog_fd; - test_attr.ctx_in = args; - test_attr.ctx_size_in = sizeof(__u64); + opts.ctx_in = args; + opts.ctx_size_in = sizeof(__u64); - err = bpf_prog_test_run_xattr(&test_attr); - CHECK(err == 0, "test_run", "should fail for too small ctx\n"); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_NEQ(err, 0, "test_run should fail for too small ctx"); - test_attr.ctx_size_in = sizeof(args); - err = bpf_prog_test_run_xattr(&test_attr); - CHECK(err < 0, "test_run", "err %d\n", errno); - CHECK(test_attr.retval != expected_retval, "check_retval", - "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval); + opts.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(opts.retval, expected_retval, "check_retval"); for (i = 0; i < nr_online; i++) { if (!online[i]) @@ -66,28 +61,23 @@ void test_raw_tp_test_run(void) opts.cpu = i; opts.retval = 0; err = bpf_prog_test_run_opts(prog_fd, &opts); - CHECK(err < 0, "test_run_opts", "err %d\n", errno); - CHECK(skel->data->on_cpu != i, "check_on_cpu", - "expect %d got %d\n", i, skel->data->on_cpu); - CHECK(opts.retval != expected_retval, - "check_retval", "expect 0x%x, got 0x%x\n", - expected_retval, opts.retval); + ASSERT_OK(err, "test_run_opts"); + ASSERT_EQ(skel->data->on_cpu, i, "check_on_cpu"); + ASSERT_EQ(opts.retval, expected_retval, "check_retval"); } /* invalid cpu ID should fail with ENXIO */ opts.cpu = 0xffffffff; err = bpf_prog_test_run_opts(prog_fd, &opts); - CHECK(err >= 0 || errno != ENXIO, - "test_run_opts_fail", - "should failed with ENXIO\n"); + ASSERT_EQ(errno, ENXIO, "test_run_opts should fail with ENXIO"); + ASSERT_ERR(err, "test_run_opts_fail"); /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */ opts.cpu = 1; opts.flags = 0; err = bpf_prog_test_run_opts(prog_fd, &opts); - CHECK(err >= 0 || errno != EINVAL, - "test_run_opts_fail", - "should failed with EINVAL\n"); + ASSERT_EQ(errno, EINVAL, "test_run_opts should fail with EINVAL"); + ASSERT_ERR(err, "test_run_opts_fail"); cleanup: close(comm_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c index 239baccabccb..f4aa7dab4766 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -56,21 +56,23 @@ void serial_test_raw_tp_writable_test_run(void) 0, }; - __u32 prog_ret; - int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, - 0, &prog_ret, 0); + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = test_skb, + .data_size_in = sizeof(test_skb), + .repeat = 1, + ); + int err = bpf_prog_test_run_opts(filter_fd, &topts); CHECK(err != 42, "test_run", "tracepoint did not modify return value\n"); - CHECK(prog_ret != 0, "test_run_ret", + CHECK(topts.retval != 0, "test_run_ret", "socket_filter did not return 0\n"); close(tp_fd); - err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0, - &prog_ret, 0); + err = bpf_prog_test_run_opts(filter_fd, &topts); CHECK(err != 0, "test_run_notrace", "test_run failed with %d errno %d\n", err, errno); - CHECK(prog_ret != 0, "test_run_ret_notrace", + CHECK(topts.retval != 0, "test_run_ret_notrace", "socket_filter did not return 0\n"); out_filterfd: diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c index aecfe662c070..70b49da5ca0a 100644 --- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c +++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c @@ -13,10 +13,14 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) struct itimerval timeo = { .it_value.tv_usec = 100000, /* 100ms */ }; - __u32 duration = 0, retval; int prog_fd; int err; int i; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 0xffffffff, + ); for (i = 0; i < ARRAY_SIZE(prog); i++) prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); @@ -24,20 +28,17 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); - CHECK(prog_fd < 0, "test-run", "errno %d\n", errno); + ASSERT_GE(prog_fd, 0, "test-run load"); err = sigaction(SIGALRM, &sigalrm_action, NULL); - CHECK(err, "test-run-signal-sigaction", "errno %d\n", errno); + ASSERT_OK(err, "test-run-signal-sigaction"); err = setitimer(ITIMER_REAL, &timeo, NULL); - CHECK(err, "test-run-signal-timer", "errno %d\n", errno); - - err = bpf_prog_test_run(prog_fd, 0xffffffff, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(duration > 500000000, /* 500ms */ - "test-run-signal-duration", - "duration %dns > 500ms\n", - duration); + ASSERT_OK(err, "test-run-signal-timer"); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_LE(topts.duration, 500000000 /* 500ms */, + "test-run-signal-duration"); signal(SIGALRM, SIG_DFL); } diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index b5319ba2ee27..ce0e555b5e38 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -20,97 +20,72 @@ void test_skb_ctx(void) .gso_size = 10, .hwtstamp = 11, }; - struct bpf_prog_test_run_attr tattr = { + LIBBPF_OPTS(bpf_test_run_opts, tattr, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .ctx_in = &skb, .ctx_size_in = sizeof(skb), .ctx_out = &skb, .ctx_size_out = sizeof(skb), - }; + ); struct bpf_object *obj; - int err; - int i; + int err, prog_fd, i; - err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, - &tattr.prog_fd); - if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (!ASSERT_OK(err, "load")) return; /* ctx_in != NULL, ctx_size_in == 0 */ tattr.ctx_size_in = 0; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + ASSERT_NEQ(err, 0, "ctx_size_in"); tattr.ctx_size_in = sizeof(skb); /* ctx_out != NULL, ctx_size_out == 0 */ tattr.ctx_size_out = 0; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + ASSERT_NEQ(err, 0, "ctx_size_out"); tattr.ctx_size_out = sizeof(skb); /* non-zero [len, tc_index] fields should be rejected*/ skb.len = 1; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + ASSERT_NEQ(err, 0, "len"); skb.len = 0; skb.tc_index = 1; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + ASSERT_NEQ(err, 0, "tc_index"); skb.tc_index = 0; /* non-zero [hash, sk] fields should be rejected */ skb.hash = 1; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + ASSERT_NEQ(err, 0, "hash"); skb.hash = 0; skb.sk = (struct bpf_sock *)1; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + ASSERT_NEQ(err, 0, "sk"); skb.sk = 0; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err != 0 || tattr.retval, - "run", - "err %d errno %d retval %d\n", - err, errno, tattr.retval); - - CHECK_ATTR(tattr.ctx_size_out != sizeof(skb), - "ctx_size_out", - "incorrect output size, want %zu have %u\n", - sizeof(skb), tattr.ctx_size_out); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + ASSERT_OK(err, "test_run"); + ASSERT_OK(tattr.retval, "test_run retval"); + ASSERT_EQ(tattr.ctx_size_out, sizeof(skb), "ctx_size_out"); for (i = 0; i < 5; i++) - CHECK_ATTR(skb.cb[i] != i + 2, - "ctx_out_cb", - "skb->cb[i] == %d, expected %d\n", - skb.cb[i], i + 2); - CHECK_ATTR(skb.priority != 7, - "ctx_out_priority", - "skb->priority == %d, expected %d\n", - skb.priority, 7); - CHECK_ATTR(skb.ifindex != 1, - "ctx_out_ifindex", - "skb->ifindex == %d, expected %d\n", - skb.ifindex, 1); - CHECK_ATTR(skb.ingress_ifindex != 11, - "ctx_out_ingress_ifindex", - "skb->ingress_ifindex == %d, expected %d\n", - skb.ingress_ifindex, 11); - CHECK_ATTR(skb.tstamp != 8, - "ctx_out_tstamp", - "skb->tstamp == %lld, expected %d\n", - skb.tstamp, 8); - CHECK_ATTR(skb.mark != 10, - "ctx_out_mark", - "skb->mark == %u, expected %d\n", - skb.mark, 10); + ASSERT_EQ(skb.cb[i], i + 2, "ctx_out_cb"); + ASSERT_EQ(skb.priority, 7, "ctx_out_priority"); + ASSERT_EQ(skb.ifindex, 1, "ctx_out_ifindex"); + ASSERT_EQ(skb.ingress_ifindex, 11, "ctx_out_ingress_ifindex"); + ASSERT_EQ(skb.tstamp, 8, "ctx_out_tstamp"); + ASSERT_EQ(skb.mark, 10, "ctx_out_mark"); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c index 6f802a1c0800..97dc8b14be48 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c @@ -9,22 +9,22 @@ void test_skb_helpers(void) .gso_segs = 8, .gso_size = 10, }; - struct bpf_prog_test_run_attr tattr = { + LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .ctx_in = &skb, .ctx_size_in = sizeof(skb), .ctx_out = &skb, .ctx_size_out = sizeof(skb), - }; + ); struct bpf_object *obj; - int err; + int err, prog_fd; - err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, - &tattr.prog_fd); - if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + err = bpf_prog_test_load("./test_skb_helpers.o", + BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (!ASSERT_OK(err, "load")) return; - err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 9fc040eaa482..9d211b5c22c4 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#define _GNU_SOURCE #include <netinet/in.h> #include <arpa/inet.h> #include <unistd.h> +#include <sched.h> #include <stdlib.h> #include <string.h> #include <errno.h> @@ -20,6 +22,7 @@ enum bpf_linum_array_idx { EGRESS_LINUM_IDX, INGRESS_LINUM_IDX, + READ_SK_DST_PORT_LINUM_IDX, __NR_BPF_LINUM_ARRAY_IDX, }; @@ -42,8 +45,16 @@ static __u64 child_cg_id; static int linum_map_fd; static __u32 duration; -static __u32 egress_linum_idx = EGRESS_LINUM_IDX; -static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; +static bool create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return false; + + if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) + return false; + + return true; +} static void print_sk(const struct bpf_sock *sk, const char *prefix) { @@ -91,19 +102,24 @@ static void check_result(void) { struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; struct bpf_sock srv_sk, cli_sk, listen_sk; - __u32 ingress_linum, egress_linum; + __u32 idx, ingress_linum, egress_linum, linum; int err; - err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, - &egress_linum); + idx = EGRESS_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum); CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); - err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, - &ingress_linum); + idx = INGRESS_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum); CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); + idx = READ_SK_DST_PORT_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum); + ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)"); + ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line"); + memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); @@ -262,7 +278,7 @@ static void test(void) char buf[DATA_LEN]; /* Prepare listen_fd */ - listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0); /* start_server() has logged the error details */ if (CHECK_FAIL(listen_fd == -1)) goto done; @@ -330,8 +346,12 @@ done: void serial_test_sock_fields(void) { - struct bpf_link *egress_link = NULL, *ingress_link = NULL; int parent_cg_fd = -1, child_cg_fd = -1; + struct bpf_link *link; + + /* Use a dedicated netns to have a fixed listen port */ + if (!create_netns()) + return; /* Create a cgroup, get fd, and join it */ parent_cg_fd = test__join_cgroup(PARENT_CGROUP); @@ -352,15 +372,20 @@ void serial_test_sock_fields(void) if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) goto done; - egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, - child_cg_fd); - if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)")) + link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)")) + goto done; + skel->links.egress_read_sock_fields = link; + + link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)")) goto done; + skel->links.ingress_read_sock_fields = link; - ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, - child_cg_fd); - if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)")) + link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port")) goto done; + skel->links.read_sk_dst_port = link; linum_map_fd = bpf_map__fd(skel->maps.linum_map); sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); @@ -369,8 +394,7 @@ void serial_test_sock_fields(void) test(); done: - bpf_link__destroy(egress_link); - bpf_link__destroy(ingress_link); + test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd >= 0) close(child_cg_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index b97a8f236b3a..cec5c0882372 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -140,12 +140,16 @@ out: static void test_sockmap_update(enum bpf_map_type map_type) { - struct bpf_prog_test_run_attr tattr; int err, prog, src, duration = 0; struct test_sockmap_update *skel; struct bpf_map *dst_map; const __u32 zero = 0; char dummy[14] = {0}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = dummy, + .data_size_in = sizeof(dummy), + .repeat = 1, + ); __s64 sk; sk = connected_socket_v4(); @@ -167,16 +171,10 @@ static void test_sockmap_update(enum bpf_map_type map_type) if (CHECK(err, "update_elem(src)", "errno=%u\n", errno)) goto out; - tattr = (struct bpf_prog_test_run_attr){ - .prog_fd = prog, - .repeat = 1, - .data_in = dummy, - .data_size_in = sizeof(dummy), - }; - - err = bpf_prog_test_run_xattr(&tattr); - if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run", - "errno=%u retval=%u\n", errno, tattr.retval)) + err = bpf_prog_test_run_opts(prog, &topts); + if (!ASSERT_OK(err, "test_run")) + goto out; + if (!ASSERT_NEQ(topts.retval, 0, "test_run retval")) goto out; compare_cookies(skel->maps.src, dst_map); diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 6307f5d2b417..8e329eaee6d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -4,14 +4,16 @@ static void *spin_lock_thread(void *arg) { - __u32 duration, retval; int err, prog_fd = *(u32 *) arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10000, + ); - err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); pthread_exit(arg); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 0a91d8d9954b..f45a1d7b0a28 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -42,7 +42,7 @@ retry: return; /* override program type */ - bpf_program__set_perf_event(skel->progs.oncpu); + bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT); err = test_stacktrace_build_id__load(skel); if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c index 81e997a69f7a..f4d40001155a 100644 --- a/tools/testing/selftests/bpf/prog_tests/syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/syscall.c @@ -20,20 +20,20 @@ void test_syscall(void) .log_buf = (uintptr_t) verifier_log, .log_size = sizeof(verifier_log), }; - struct bpf_prog_test_run_attr tattr = { + LIBBPF_OPTS(bpf_test_run_opts, tattr, .ctx_in = &ctx, .ctx_size_in = sizeof(ctx), - }; + ); struct syscall *skel = NULL; __u64 key = 12, value = 0; - int err; + int err, prog_fd; skel = syscall__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_load")) goto cleanup; - tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog); - err = bpf_prog_test_run_xattr(&tattr); + prog_fd = bpf_program__fd(skel->progs.bpf_prog); + err = bpf_prog_test_run_opts(prog_fd, &tattr); ASSERT_EQ(err, 0, "err"); ASSERT_EQ(tattr.retval, 1, "retval"); ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd"); diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 796f231582f8..c4da87ec3ba4 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -12,9 +12,13 @@ static void test_tailcall_1(void) struct bpf_map *prog_array; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; char prog_name[32]; char buff[128] = {}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -54,20 +58,18 @@ static void test_tailcall_1(void) } for (i = 0; i < bpf_map__max_entries(prog_array); i++) { - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != i, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, i, "tailcall retval"); err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; } - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 3, "tailcall retval"); for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); @@ -85,10 +87,9 @@ static void test_tailcall_1(void) goto out; } - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_OK(topts.retval, "tailcall retval"); for (i = 0; i < bpf_map__max_entries(prog_array); i++) { j = bpf_map__max_entries(prog_array) - 1 - i; @@ -110,30 +111,27 @@ static void test_tailcall_1(void) for (i = 0; i < bpf_map__max_entries(prog_array); i++) { j = bpf_map__max_entries(prog_array) - 1 - i; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != j, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, j, "tailcall retval"); err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; } - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 3, "tailcall retval"); for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err >= 0 || errno != ENOENT)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 3, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 3, "tailcall retval"); } out: @@ -150,9 +148,13 @@ static void test_tailcall_2(void) struct bpf_map *prog_array; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; char prog_name[32]; char buff[128] = {}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -191,30 +193,27 @@ static void test_tailcall_2(void) goto out; } - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 2, "tailcall retval"); i = 2; err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 1, "tailcall retval"); i = 0; err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 3, "tailcall retval"); out: bpf_object__close(obj); } @@ -225,8 +224,12 @@ static void test_tailcall_count(const char *which) struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; char buff[128] = {}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -262,10 +265,9 @@ static void test_tailcall_count(const char *which) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 1, "tailcall retval"); data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) @@ -277,18 +279,17 @@ static void test_tailcall_count(const char *which) i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); - CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n", - err, errno, val); + ASSERT_OK(err, "tailcall count"); + ASSERT_EQ(val, 33, "tailcall count"); i = 0; err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_OK(topts.retval, "tailcall retval"); out: bpf_object__close(obj); } @@ -319,10 +320,14 @@ static void test_tailcall_4(void) struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; static const int zero = 0; char buff[128] = {}; char prog_name[32]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -374,10 +379,9 @@ static void test_tailcall_4(void) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != i, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, i, "tailcall retval"); } for (i = 0; i < bpf_map__max_entries(prog_array); i++) { @@ -389,10 +393,9 @@ static void test_tailcall_4(void) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 3, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 3, "tailcall retval"); } out: bpf_object__close(obj); @@ -407,10 +410,14 @@ static void test_tailcall_5(void) struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; static const int zero = 0; char buff[128] = {}; char prog_name[32]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -462,10 +469,9 @@ static void test_tailcall_5(void) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != i, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, i, "tailcall retval"); } for (i = 0; i < bpf_map__max_entries(prog_array); i++) { @@ -477,10 +483,9 @@ static void test_tailcall_5(void) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 3, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 3, "tailcall retval"); } out: bpf_object__close(obj); @@ -495,8 +500,12 @@ static void test_tailcall_bpf2bpf_1(void) struct bpf_map *prog_array; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; char prog_name[32]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -536,10 +545,9 @@ static void test_tailcall_bpf2bpf_1(void) goto out; } - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - 0, &retval, &duration); - CHECK(err || retval != 1, "tailcall", - "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 1, "tailcall retval"); /* jmp -> nop, call subprog that will do tailcall */ i = 1; @@ -547,10 +555,9 @@ static void test_tailcall_bpf2bpf_1(void) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - 0, &retval, &duration); - CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_OK(topts.retval, "tailcall retval"); /* make sure that subprog can access ctx and entry prog that * called this subprog can properly return @@ -560,11 +567,9 @@ static void test_tailcall_bpf2bpf_1(void) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - 0, &retval, &duration); - CHECK(err || retval != sizeof(pkt_v4) * 2, - "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval"); out: bpf_object__close(obj); } @@ -579,8 +584,12 @@ static void test_tailcall_bpf2bpf_2(void) struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; char buff[128] = {}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -616,10 +625,9 @@ static void test_tailcall_bpf2bpf_2(void) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 1, "tailcall retval"); data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) @@ -631,18 +639,17 @@ static void test_tailcall_bpf2bpf_2(void) i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); - CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n", - err, errno, val); + ASSERT_OK(err, "tailcall count"); + ASSERT_EQ(val, 33, "tailcall count"); i = 0; err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, - &duration, &retval, NULL); - CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_OK(topts.retval, "tailcall retval"); out: bpf_object__close(obj); } @@ -657,8 +664,12 @@ static void test_tailcall_bpf2bpf_3(void) struct bpf_map *prog_array; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; char prog_name[32]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -697,33 +708,27 @@ static void test_tailcall_bpf2bpf_3(void) goto out; } - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - &duration, &retval, NULL); - CHECK(err || retval != sizeof(pkt_v4) * 3, - "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval"); i = 1; err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - &duration, &retval, NULL); - CHECK(err || retval != sizeof(pkt_v4), - "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, sizeof(pkt_v4), "tailcall retval"); i = 0; err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - &duration, &retval, NULL); - CHECK(err || retval != sizeof(pkt_v4) * 2, - "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval"); out: bpf_object__close(obj); } @@ -754,8 +759,12 @@ static void test_tailcall_bpf2bpf_4(bool noise) struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; - __u32 retval, duration; char prog_name[32]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); @@ -809,15 +818,14 @@ static void test_tailcall_bpf2bpf_4(bool noise) if (CHECK_FAIL(err)) goto out; - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - &duration, &retval, NULL); - CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval"); i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); - CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n", - err, errno, val.count); + ASSERT_OK(err, "tailcall count"); + ASSERT_EQ(val.count, 31, "tailcall count"); out: bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c index 37c20b5ffa70..61935e7e056a 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c +++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c @@ -3,18 +3,22 @@ #include <test_progs.h> #include "test_task_pt_regs.skel.h" +/* uprobe attach point */ +static void trigger_func(void) +{ + asm volatile (""); +} + void test_task_pt_regs(void) { struct test_task_pt_regs *skel; struct bpf_link *uprobe_link; - size_t uprobe_offset; - ssize_t base_addr; + ssize_t uprobe_offset; bool match; - base_addr = get_base_addr(); - if (!ASSERT_GT(base_addr, 0, "get_base_addr")) + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) return; - uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr); skel = test_task_pt_regs__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -32,7 +36,7 @@ void test_task_pt_regs(void) skel->links.handle_uprobe = uprobe_link; /* trigger & validate uprobe */ - get_base_addr(); + trigger_func(); if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c new file mode 100644 index 000000000000..c381faaae741 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2022 Sony Group Corporation */ +#include <sys/prctl.h> +#include <test_progs.h> +#include "bpf_syscall_macro.skel.h" + +void test_bpf_syscall_macro(void) +{ + struct bpf_syscall_macro *skel = NULL; + int err; + int exp_arg1 = 1001; + unsigned long exp_arg2 = 12; + unsigned long exp_arg3 = 13; + unsigned long exp_arg4 = 14; + unsigned long exp_arg5 = 15; + + /* check whether it can open program */ + skel = bpf_syscall_macro__open(); + if (!ASSERT_OK_PTR(skel, "bpf_syscall_macro__open")) + return; + + skel->rodata->filter_pid = getpid(); + + /* check whether it can load program */ + err = bpf_syscall_macro__load(skel); + if (!ASSERT_OK(err, "bpf_syscall_macro__load")) + goto cleanup; + + /* check whether it can attach kprobe */ + err = bpf_syscall_macro__attach(skel); + if (!ASSERT_OK(err, "bpf_syscall_macro__attach")) + goto cleanup; + + /* check whether args of syscall are copied correctly */ + prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5); +#if defined(__aarch64__) || defined(__s390__) + ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1"); +#else + ASSERT_EQ(skel->bss->arg1, exp_arg1, "syscall_arg1"); +#endif + ASSERT_EQ(skel->bss->arg2, exp_arg2, "syscall_arg2"); + ASSERT_EQ(skel->bss->arg3, exp_arg3, "syscall_arg3"); + /* it cannot copy arg4 when uses PT_REGS_PARM4 on x86_64 */ +#ifdef __x86_64__ + ASSERT_NEQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx"); +#else + ASSERT_EQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx"); +#endif + ASSERT_EQ(skel->bss->arg4, exp_arg4, "syscall_arg4"); + ASSERT_EQ(skel->bss->arg5, exp_arg5, "syscall_arg5"); + + /* check whether args of syscall are copied correctly for CORE variants */ + ASSERT_EQ(skel->bss->arg1_core, exp_arg1, "syscall_arg1_core_variant"); + ASSERT_EQ(skel->bss->arg2_core, exp_arg2, "syscall_arg2_core_variant"); + ASSERT_EQ(skel->bss->arg3_core, exp_arg3, "syscall_arg3_core_variant"); + /* it cannot copy arg4 when uses PT_REGS_PARM4_CORE on x86_64 */ +#ifdef __x86_64__ + ASSERT_NEQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant"); +#else + ASSERT_EQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant"); +#endif + ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant"); + ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant"); + + ASSERT_EQ(skel->bss->option_syscall, exp_arg1, "BPF_KPROBE_SYSCALL_option"); + ASSERT_EQ(skel->bss->arg2_syscall, exp_arg2, "BPF_KPROBE_SYSCALL_arg2"); + ASSERT_EQ(skel->bss->arg3_syscall, exp_arg3, "BPF_KPROBE_SYSCALL_arg3"); + ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4"); + ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5"); + +cleanup: + bpf_syscall_macro__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c index 4ca275101ee0..de24e8f0e738 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_profiler.c +++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c @@ -8,20 +8,20 @@ static int sanity_run(struct bpf_program *prog) { - struct bpf_prog_test_run_attr test_attr = {}; + LIBBPF_OPTS(bpf_test_run_opts, test_attr); __u64 args[] = {1, 2, 3}; - __u32 duration = 0; int err, prog_fd; prog_fd = bpf_program__fd(prog); - test_attr.prog_fd = prog_fd; test_attr.ctx_in = args; test_attr.ctx_size_in = sizeof(args); - err = bpf_prog_test_run_xattr(&test_attr); - if (CHECK(err || test_attr.retval, "test_run", - "err %d errno %d retval %d duration %d\n", - err, errno, test_attr.retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &test_attr); + if (!ASSERT_OK(err, "test_run")) + return -1; + + if (!ASSERT_OK(test_attr.retval, "test_run retval")) return -1; + return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c index cf1215531920..ae93411fd582 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c +++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c @@ -6,15 +6,18 @@ static int sanity_run(struct bpf_program *prog) { - __u32 duration, retval; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); prog_fd = bpf_program__fd(prog); - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - if (CHECK(err || retval != 123, "test_run", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration)) + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run")) + return -1; + if (!ASSERT_EQ(topts.retval, 123, "test_run retval")) return -1; return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 0f4e49e622cd..7eb049214859 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -6,7 +6,7 @@ static int timer(struct timer *timer_skel) { int err, prog_fd; - __u32 duration = 0, retval; + LIBBPF_OPTS(bpf_test_run_opts, topts); err = timer__attach(timer_skel); if (!ASSERT_OK(err, "timer_attach")) @@ -16,10 +16,9 @@ static int timer(struct timer *timer_skel) ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1"); prog_fd = bpf_program__fd(timer_skel->progs.test1); - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(retval, 0, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); timer__detach(timer_skel); usleep(50); /* 10 usecs should be enough, but give it extra */ diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 949a0617869d..2ee5f5ae11d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -6,19 +6,18 @@ static int timer_mim(struct timer_mim *timer_skel) { - __u32 duration = 0, retval; __u64 cnt1, cnt2; int err, prog_fd, key1 = 1; + LIBBPF_OPTS(bpf_test_run_opts, topts); err = timer_mim__attach(timer_skel); if (!ASSERT_OK(err, "timer_attach")) return err; prog_fd = bpf_program__fd(timer_skel->progs.test1); - err = bpf_prog_test_run(prog_fd, 1, NULL, 0, - NULL, NULL, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(retval, 0, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); timer_mim__detach(timer_skel); /* check that timer_cb[12] are incrementing 'cnt' */ diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c index 924441d4362d..aabdff7bea3e 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_ext.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c @@ -23,8 +23,12 @@ void test_trace_ext(void) int err, pkt_fd, ext_fd; struct bpf_program *prog; char buf[100]; - __u32 retval; __u64 len; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); /* open/load/attach test_pkt_md_access */ skel_pkt = test_pkt_md_access__open_and_load(); @@ -77,32 +81,32 @@ void test_trace_ext(void) /* load/attach tracing */ err = test_trace_ext_tracing__load(skel_trace); - if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) { + if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new load")) { libbpf_strerror(err, buf, sizeof(buf)); fprintf(stderr, "%s\n", buf); goto cleanup; } err = test_trace_ext_tracing__attach(skel_trace); - if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err)) + if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new attach")) goto cleanup; /* trigger the test */ - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval); + err = bpf_prog_test_run_opts(pkt_fd, &topts); + ASSERT_OK(err, "test_run_opts err"); + ASSERT_OK(topts.retval, "test_run_opts retval"); bss_ext = skel_ext->bss; bss_trace = skel_trace->bss; len = bss_ext->ext_called; - CHECK(bss_ext->ext_called == 0, - "check", "failed to trigger freplace/test_pkt_md_access\n"); - CHECK(bss_trace->fentry_called != len, - "check", "failed to trigger fentry/test_pkt_md_access_new\n"); - CHECK(bss_trace->fexit_called != len, - "check", "failed to trigger fexit/test_pkt_md_access_new\n"); + ASSERT_NEQ(bss_ext->ext_called, 0, + "failed to trigger freplace/test_pkt_md_access"); + ASSERT_EQ(bss_trace->fentry_called, len, + "failed to trigger fentry/test_pkt_md_access_new"); + ASSERT_EQ(bss_trace->fexit_called, len, + "failed to trigger fexit/test_pkt_md_access_new"); cleanup: test_trace_ext_tracing__destroy(skel_trace); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c index ac65456b7ab8..ec21c53cb1da 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp.c @@ -13,8 +13,14 @@ void test_xdp(void) char buf[128]; struct ipv6hdr iph6; struct iphdr iph; - __u32 duration, retval, size; int err, prog_fd, map_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) @@ -26,21 +32,23 @@ void test_xdp(void) bpf_map_update_elem(map_fd, &key4, &value4, 0); bpf_map_update_elem(map_fd, &key6, &value6, 0); - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); - CHECK(err || retval != XDP_TX || size != 74 || - iph.protocol != IPPROTO_IPIP, "ipv4", - "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, XDP_TX, "ipv4 test_run retval"); + ASSERT_EQ(topts.data_size_out, 74, "ipv4 test_run data_size_out"); + ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 test_run iph.protocol"); - err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), - buf, &size, &retval, &duration); + topts.data_in = &pkt_v6; + topts.data_size_in = sizeof(pkt_v6); + topts.data_size_out = sizeof(buf); + + err = bpf_prog_test_run_opts(prog_fd, &topts); memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6)); - CHECK(err || retval != XDP_TX || size != 114 || - iph6.nexthdr != IPPROTO_IPV6, "ipv6", - "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, XDP_TX, "ipv6 test_run retval"); + ASSERT_EQ(topts.data_size_out, 114, "ipv6 test_run data_size_out"); + ASSERT_EQ(iph6.nexthdr, IPPROTO_IPV6, "ipv6 test_run iph6.nexthdr"); out: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c index 31c188666e81..d18e6f343c48 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c @@ -2,15 +2,15 @@ #include <test_progs.h> #include <network_helpers.h> -void test_xdp_update_frags(void) +static void test_xdp_update_frags(void) { const char *file = "./test_xdp_update_frags.o"; - __u32 duration, retval, size; struct bpf_program *prog; struct bpf_object *obj; int err, prog_fd; __u32 *offset; __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); obj = bpf_object__open(file); if (libbpf_get_error(obj)) @@ -32,12 +32,16 @@ void test_xdp_update_frags(void) buf[*offset] = 0xaa; /* marker at offset 16 (head) */ buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */ - err = bpf_prog_test_run(prog_fd, 1, buf, 128, - buf, &size, &retval, &duration); + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 128; + topts.data_size_out = 128; + + err = bpf_prog_test_run_opts(prog_fd, &topts); /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */ ASSERT_OK(err, "xdp_update_frag"); - ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval"); ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]"); ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]"); @@ -53,12 +57,16 @@ void test_xdp_update_frags(void) buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */ buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */ - err = bpf_prog_test_run(prog_fd, 1, buf, 9000, - buf, &size, &retval, &duration); + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 9000; + topts.data_size_out = 9000; + + err = bpf_prog_test_run_opts(prog_fd, &topts); /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */ ASSERT_OK(err, "xdp_update_frag"); - ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval"); ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]"); ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]"); @@ -68,12 +76,11 @@ void test_xdp_update_frags(void) buf[*offset] = 0xaa; /* marker at offset 3510 (head) */ buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */ - err = bpf_prog_test_run(prog_fd, 1, buf, 9000, - buf, &size, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */ ASSERT_OK(err, "xdp_update_frag"); - ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval"); ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]"); ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]"); @@ -83,12 +90,11 @@ void test_xdp_update_frags(void) buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */ buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */ - err = bpf_prog_test_run(prog_fd, 1, buf, 9000, - buf, &size, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */ ASSERT_OK(err, "xdp_update_frag"); - ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval"); ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]"); ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]"); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index ccc9e63254a8..21ceac24e174 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -5,26 +5,34 @@ static void test_xdp_adjust_tail_shrink(void) { const char *file = "./test_xdp_adjust_tail_shrink.o"; - __u32 duration, retval, size, expect_sz; + __u32 expect_sz; struct bpf_object *obj; int err, prog_fd; char buf[128]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) return; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "ipv4"); - ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); + ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval"); expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */ - err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), - buf, &size, &retval, &duration); + topts.data_in = &pkt_v6; + topts.data_size_in = sizeof(pkt_v6); + topts.data_size_out = sizeof(buf); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "ipv6"); - ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); - ASSERT_EQ(size, expect_sz, "ipv6 size"); + ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval"); + ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size"); bpf_object__close(obj); } @@ -34,24 +42,31 @@ static void test_xdp_adjust_tail_grow(void) const char *file = "./test_xdp_adjust_tail_grow.o"; struct bpf_object *obj; char buf[4096]; /* avoid segfault: large buf to hold grow results */ - __u32 duration, retval, size, expect_sz; + __u32 expect_sz; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; - err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "ipv4"); - ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); + ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval"); expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */ - err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */, - buf, &size, &retval, &duration); + topts.data_in = &pkt_v6; + topts.data_size_in = sizeof(pkt_v6); + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "ipv6"); - ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); - ASSERT_EQ(size, expect_sz, "ipv6 size"); + ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval"); + ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size"); bpf_object__close(obj); } @@ -63,17 +78,17 @@ static void test_xdp_adjust_tail_grow2(void) int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/; struct bpf_object *obj; int err, cnt, i; - int max_grow; + int max_grow, prog_fd; - struct bpf_prog_test_run_attr tattr = { + LIBBPF_OPTS(bpf_test_run_opts, tattr, .repeat = 1, .data_in = &buf, .data_out = &buf, .data_size_in = 0, /* Per test */ .data_size_out = 0, /* Per test */ - }; + ); - err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; @@ -82,7 +97,7 @@ static void test_xdp_adjust_tail_grow2(void) tattr.data_size_in = 64; /* Determine test case via pkt size */ tattr.data_size_out = 128; /* Limit copy_size */ /* Kernel side alloc packet memory area that is zero init */ - err = bpf_prog_test_run_xattr(&tattr); + err = bpf_prog_test_run_opts(prog_fd, &tattr); ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */ ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval"); @@ -100,7 +115,7 @@ static void test_xdp_adjust_tail_grow2(void) memset(buf, 2, sizeof(buf)); tattr.data_size_in = 128; /* Determine test case via pkt size */ tattr.data_size_out = sizeof(buf); /* Copy everything */ - err = bpf_prog_test_run_xattr(&tattr); + err = bpf_prog_test_run_opts(prog_fd, &tattr); max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */ ASSERT_OK(err, "case-128"); @@ -118,14 +133,15 @@ static void test_xdp_adjust_tail_grow2(void) bpf_object__close(obj); } -void test_xdp_adjust_frags_tail_shrink(void) +static void test_xdp_adjust_frags_tail_shrink(void) { const char *file = "./test_xdp_adjust_tail_shrink.o"; - __u32 duration, retval, size, exp_size; + __u32 exp_size; struct bpf_program *prog; struct bpf_object *obj; int err, prog_fd; __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); /* For the individual test cases, the first byte in the packet * indicates which test will be run. @@ -148,46 +164,51 @@ void test_xdp_adjust_frags_tail_shrink(void) /* Test case removing 10 bytes from last frag, NOT freeing it */ exp_size = 8990; /* 9000 - 10 */ - err = bpf_prog_test_run(prog_fd, 1, buf, 9000, - buf, &size, &retval, &duration); + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 9000; + topts.data_size_out = 9000; + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "9Kb-10b"); - ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval"); - ASSERT_EQ(size, exp_size, "9Kb-10b size"); + ASSERT_EQ(topts.retval, XDP_TX, "9Kb-10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-10b size"); /* Test case removing one of two pages, assuming 4K pages */ buf[0] = 1; exp_size = 4900; /* 9000 - 4100 */ - err = bpf_prog_test_run(prog_fd, 1, buf, 9000, - buf, &size, &retval, &duration); + + topts.data_size_out = 9000; /* reset from previous invocation */ + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "9Kb-4Kb"); - ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval"); - ASSERT_EQ(size, exp_size, "9Kb-4Kb size"); + ASSERT_EQ(topts.retval, XDP_TX, "9Kb-4Kb retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-4Kb size"); /* Test case removing two pages resulting in a linear xdp_buff */ buf[0] = 2; exp_size = 800; /* 9000 - 8200 */ - err = bpf_prog_test_run(prog_fd, 1, buf, 9000, - buf, &size, &retval, &duration); + topts.data_size_out = 9000; /* reset from previous invocation */ + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "9Kb-9Kb"); - ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval"); - ASSERT_EQ(size, exp_size, "9Kb-9Kb size"); + ASSERT_EQ(topts.retval, XDP_TX, "9Kb-9Kb retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-9Kb size"); free(buf); out: bpf_object__close(obj); } -void test_xdp_adjust_frags_tail_grow(void) +static void test_xdp_adjust_frags_tail_grow(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; - __u32 duration, retval, size, exp_size; + __u32 exp_size; struct bpf_program *prog; struct bpf_object *obj; int err, i, prog_fd; __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); obj = bpf_object__open(file); if (libbpf_get_error(obj)) @@ -205,14 +226,17 @@ void test_xdp_adjust_frags_tail_grow(void) /* Test case add 10 bytes to last frag */ memset(buf, 1, 16384); - size = 9000; - exp_size = size + 10; - err = bpf_prog_test_run(prog_fd, 1, buf, size, - buf, &size, &retval, &duration); + exp_size = 9000 + 10; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 9000; + topts.data_size_out = 16384; + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "9Kb+10b"); - ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval"); - ASSERT_EQ(size, exp_size, "9Kb+10b size"); + ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size"); for (i = 0; i < 9000; i++) ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); @@ -225,14 +249,16 @@ void test_xdp_adjust_frags_tail_grow(void) /* Test a too large grow */ memset(buf, 1, 16384); - size = 9001; - exp_size = size; - err = bpf_prog_test_run(prog_fd, 1, buf, size, - buf, &size, &retval, &duration); + exp_size = 9001; + + topts.data_in = topts.data_out = buf; + topts.data_size_in = 9001; + topts.data_size_out = 16384; + err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "9Kb+10b"); - ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval"); - ASSERT_EQ(size, exp_size, "9Kb+10b size"); + ASSERT_EQ(topts.retval, XDP_DROP, "9Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size"); free(buf); out: diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index c6fa390e3aa1..62aa3edda5e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -11,8 +11,7 @@ void serial_test_xdp_attach(void) const char *file = "./test_xdp.o"; struct bpf_prog_info info = {}; int err, fd1, fd2, fd3; - DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, - .old_fd = -1); + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); len = sizeof(info); @@ -38,49 +37,47 @@ void serial_test_xdp_attach(void) if (CHECK_FAIL(err)) goto out_2; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, - &opts); + err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts); if (CHECK(err, "load_ok", "initial load failed")) goto out_close; - err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0); if (CHECK(err || id0 != id1, "id1_check", "loaded prog id %u != id1 %u, err %d", id0, id1, err)) goto out_close; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, - &opts); + err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts); if (CHECK(!err, "load_fail", "load with expected id didn't fail")) goto out; - opts.old_fd = fd1; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts); + opts.old_prog_fd = fd1; + err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts); if (CHECK(err, "replace_ok", "replace valid old_fd failed")) goto out; - err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0); if (CHECK(err || id0 != id2, "id2_check", "loaded prog id %u != id2 %u, err %d", id0, id2, err)) goto out_close; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts); + err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts); if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail")) goto out; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts); + err = bpf_xdp_detach(IFINDEX_LO, 0, &opts); if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail")) goto out; - opts.old_fd = fd2; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts); + opts.old_prog_fd = fd2; + err = bpf_xdp_detach(IFINDEX_LO, 0, &opts); if (CHECK(err, "remove_ok", "remove valid old_fd failed")) goto out; - err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0); if (CHECK(err || id0 != 0, "unload_check", "loaded prog id %u != 0, err %d", id0, err)) goto out_close; out: - bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); + bpf_xdp_detach(IFINDEX_LO, 0, NULL); out_close: bpf_object__close(obj3); out_2: diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 9c395ea680c6..76967d8ace9c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -45,9 +45,9 @@ static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb, struct test_xdp_bpf2bpf *ftrace_skel, int pkt_size) { - __u32 duration = 0, retval, size; __u8 *buf, *buf_in; int err; + LIBBPF_OPTS(bpf_test_run_opts, topts); if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") || !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size")) @@ -73,12 +73,16 @@ static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb, } /* Run test program */ - err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size, - buf, &size, &retval, &duration); + topts.data_in = buf_in; + topts.data_size_in = pkt_size; + topts.data_out = buf; + topts.data_size_out = BUF_SZ; + + err = bpf_prog_test_run_opts(pkt_fd, &topts); ASSERT_OK(err, "ipv4"); - ASSERT_EQ(retval, XDP_PASS, "ipv4 retval"); - ASSERT_EQ(size, pkt_size, "ipv4 size"); + ASSERT_EQ(topts.retval, XDP_PASS, "ipv4 retval"); + ASSERT_EQ(topts.data_size_out, pkt_size, "ipv4 size"); /* Make sure bpf_xdp_output() was triggered and it sent the expected * data to the perf ring buffer. diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 13aabb3b6cf2..f775a1613833 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -8,7 +8,7 @@ #define IFINDEX_LO 1 -void test_xdp_with_cpumap_helpers(void) +static void test_xdp_with_cpumap_helpers(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; @@ -24,11 +24,11 @@ void test_xdp_with_cpumap_helpers(void) return; prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); - err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP")) goto out_close; - err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); ASSERT_OK(err, "XDP program detach"); prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); @@ -46,9 +46,9 @@ void test_xdp_with_cpumap_helpers(void) ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); /* can not attach BPF_XDP_CPUMAP program to a device */ - err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program")) - bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); val.qsize = 192; val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); @@ -68,7 +68,7 @@ out_close: test_xdp_with_cpumap_helpers__destroy(skel); } -void test_xdp_with_cpumap_frags_helpers(void) +static void test_xdp_with_cpumap_frags_helpers(void) { struct test_xdp_with_cpumap_frags_helpers *skel; struct bpf_prog_info info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 2a784ccd3136..ead40016c324 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -26,11 +26,11 @@ static void test_xdp_with_devmap_helpers(void) return; dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog); - err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap")) goto out_close; - err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); ASSERT_OK(err, "XDP program detach"); dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); @@ -48,9 +48,9 @@ static void test_xdp_with_devmap_helpers(void) ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id"); /* can not attach BPF_XDP_DEVMAP program to a device */ - err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program")) - bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); val.ifindex = 1; val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); @@ -81,7 +81,7 @@ static void test_neg_xdp_devmap_helpers(void) } } -void test_xdp_with_devmap_frags_helpers(void) +static void test_xdp_with_devmap_frags_helpers(void) { struct test_xdp_with_devmap_frags_helpers *skel; struct bpf_prog_info info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c index abe48e82e1dc..0d01ff6cb91a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -14,13 +14,13 @@ void serial_test_xdp_info(void) /* Get prog_id for XDP_ATTACHED_NONE mode */ - err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id); if (CHECK(err, "get_xdp_none", "errno=%d\n", errno)) return; if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id)) return; - err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id); if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno)) return; if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n", @@ -37,32 +37,32 @@ void serial_test_xdp_info(void) if (CHECK(err, "get_prog_info", "errno=%d\n", errno)) goto out_close; - err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno)) goto out_close; /* Get prog_id for single prog mode */ - err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id); if (CHECK(err, "get_xdp", "errno=%d\n", errno)) goto out; if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n")) goto out; - err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE); + err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id); if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno)) goto out; if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n")) goto out; - err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE); + err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id); if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno)) goto out; if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id)) goto out; out: - bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); + bpf_xdp_detach(IFINDEX_LO, 0, NULL); out_close: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index b2b357f8c74c..3e9d5c5521f0 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -8,9 +8,9 @@ void serial_test_xdp_link(void) { - DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); struct test_xdp_link *skel1 = NULL, *skel2 = NULL; __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2; + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); struct bpf_link_info link_info; struct bpf_prog_info prog_info; struct bpf_link *link; @@ -41,12 +41,12 @@ void serial_test_xdp_link(void) id2 = prog_info.id; /* set initial prog attachment */ - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); + err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); if (!ASSERT_OK(err, "fd_attach")) goto cleanup; /* validate prog ID */ - err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0); if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) goto cleanup; @@ -55,14 +55,14 @@ void serial_test_xdp_link(void) if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { bpf_link__destroy(link); /* best-effort detach prog */ - opts.old_fd = prog_fd1; - bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); + opts.old_prog_fd = prog_fd1; + bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts); goto cleanup; } /* detach BPF program */ - opts.old_fd = prog_fd1; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); + opts.old_prog_fd = prog_fd1; + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; @@ -73,23 +73,23 @@ void serial_test_xdp_link(void) skel1->links.xdp_handler = link; /* validate prog ID */ - err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0); if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) goto cleanup; /* BPF prog attach is not allowed to replace BPF link */ - opts.old_fd = prog_fd1; - err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); + opts.old_prog_fd = prog_fd1; + err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); if (!ASSERT_ERR(err, "prog_attach_fail")) goto cleanup; /* Can't force-update when BPF link is active */ - err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); + err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL); if (!ASSERT_ERR(err, "prog_update_fail")) goto cleanup; /* Can't force-detach when BPF link is active */ - err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); + err = bpf_xdp_detach(IFINDEX_LO, 0, NULL); if (!ASSERT_ERR(err, "prog_detach_fail")) goto cleanup; @@ -109,7 +109,7 @@ void serial_test_xdp_link(void) goto cleanup; skel2->links.xdp_handler = link; - err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0); if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index 0281095de266..92ef0aa50866 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -25,43 +25,49 @@ void test_xdp_noinline(void) __u8 flags; } real_def = {.dst = MAGIC_VAL}; __u32 ch_key = 11, real_num = 3; - __u32 duration = 0, retval, size; int err, i; __u64 bytes = 0, pkts = 0; char buf[128]; u32 *magic = (u32 *)buf; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = NUM_ITER, + ); skel = test_xdp_noinline__open_and_load(); - if (CHECK(!skel, "skel_open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0); bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0); bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0); - err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4), - NUM_ITER, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); - CHECK(err || retval != 1 || size != 54 || - *magic != MAGIC_VAL, "ipv4", - "err %d errno %d retval %d size %d magic %x\n", - err, errno, retval, size, *magic); + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v4), &topts); + ASSERT_OK(err, "ipv4 test_run"); + ASSERT_EQ(topts.retval, 1, "ipv4 test_run retval"); + ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out"); + ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 test_run magic"); - err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6), - NUM_ITER, &pkt_v6, sizeof(pkt_v6), - buf, &size, &retval, &duration); - CHECK(err || retval != 1 || size != 74 || - *magic != MAGIC_VAL, "ipv6", - "err %d errno %d retval %d size %d magic %x\n", - err, errno, retval, size, *magic); + topts.data_in = &pkt_v6; + topts.data_size_in = sizeof(pkt_v6); + topts.data_out = buf; + topts.data_size_out = sizeof(buf); + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v6), &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_EQ(topts.retval, 1, "ipv6 test_run retval"); + ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out"); + ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 test_run magic"); bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats); for (i = 0; i < nr_cpus; i++) { bytes += stats[i].bytes; pkts += stats[i].pkts; } - CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2, - "stats", "bytes %lld pkts %lld\n", - (unsigned long long)bytes, (unsigned long long)pkts); + ASSERT_EQ(bytes, MAGIC_BYTES * NUM_ITER * 2, "stats bytes"); + ASSERT_EQ(pkts, NUM_ITER * 2, "stats pkts"); test_xdp_noinline__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c index 15a3900e4370..f543d1bd21b8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c @@ -4,22 +4,25 @@ void test_xdp_perf(void) { const char *file = "./xdp_dummy.o"; - __u32 duration, retval, size; struct bpf_object *obj; char in[128], out[128]; int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = in, + .data_size_in = sizeof(in), + .data_out = out, + .data_size_out = sizeof(out), + .repeat = 1000000, + ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128, - out, &size, &retval, &duration); - - CHECK(err || retval != XDP_PASS || size != 128, - "xdp-perf", - "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "test_run retval"); + ASSERT_EQ(topts.data_size_out, 128, "test_run data_size_out"); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c index d9a88dd1ea65..7efcbdbe772d 100644 --- a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c @@ -5,6 +5,7 @@ #include <linux/bpf.h> #include <stdbool.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -87,7 +88,7 @@ bloom_callback(struct bpf_map *map, __u32 *key, void *val, return 0; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int bloom_lookup(void *ctx) { struct callback_ctx data; @@ -100,7 +101,7 @@ int bloom_lookup(void *ctx) return 0; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int bloom_update(void *ctx) { struct callback_ctx data; @@ -113,7 +114,7 @@ int bloom_update(void *ctx) return 0; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int bloom_hashmap_lookup(void *ctx) { __u64 *result; diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c index 1316f3db79d9..f245fcfe0c61 100644 --- a/tools/testing/selftests/bpf/progs/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -51,7 +52,7 @@ check_elem(struct bpf_map *map, __u32 *key, __u32 *val, return 0; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int inner_map(void *ctx) { struct bpf_map *inner_map; @@ -70,7 +71,7 @@ int inner_map(void *ctx) return 0; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int check_bloom(void *ctx) { struct callback_ctx data; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index c86b93f33b32..d22741272692 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -2,6 +2,7 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; @@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx) BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid); return 0; } + +int num_expected_failure_copy_from_user_task = 0; +int num_success_copy_from_user_task = 0; + +SEC("iter.s/task") +int dump_task_sleepable(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + static const char info[] = " === END ==="; + struct pt_regs *regs; + void *ptr; + uint32_t user_data = 0; + int ret; + + if (task == (void *)0) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Read an invalid pointer and ensure we get an error */ + ptr = NULL; + ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0); + if (ret) { + ++num_expected_failure_copy_from_user_task; + } else { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Try to read the contents of the task's instruction pointer from the + * remote task's address space. + */ + regs = (struct pt_regs *)bpf_task_pt_regs(task); + if (regs == (void *)0) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + ptr = (void *)PT_REGS_IP(regs); + + ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0); + if (ret) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + ++num_success_copy_from_user_task; + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, " tgid gid data\n"); + + BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c index 12349e4601e8..e08565282759 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -3,6 +3,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -53,7 +54,7 @@ static int nested_callback1(__u32 index, void *data) return 0; } -SEC("fentry/__x64_sys_nanosleep") +SEC("fentry/" SYS_PREFIX "sys_nanosleep") int test_prog(void *ctx) { struct callback_ctx data = {}; @@ -71,7 +72,7 @@ int test_prog(void *ctx) return 0; } -SEC("fentry/__x64_sys_nanosleep") +SEC("fentry/" SYS_PREFIX "sys_nanosleep") int prog_null_ctx(void *ctx) { if (bpf_get_current_pid_tgid() >> 32 != pid) @@ -82,7 +83,7 @@ int prog_null_ctx(void *ctx) return 0; } -SEC("fentry/__x64_sys_nanosleep") +SEC("fentry/" SYS_PREFIX "sys_nanosleep") int prog_invalid_flags(void *ctx) { struct callback_ctx data = {}; @@ -95,7 +96,7 @@ int prog_invalid_flags(void *ctx) return 0; } -SEC("fentry/__x64_sys_nanosleep") +SEC("fentry/" SYS_PREFIX "sys_nanosleep") int prog_nested_calls(void *ctx) { struct callback_ctx data = {}; diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c index 9dafdc244462..4ce76eb064c4 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -3,6 +3,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -14,7 +15,7 @@ static int empty_callback(__u32 index, void *data) return 0; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { for (int i = 0; i < 1000; i++) { diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h new file mode 100644 index 000000000000..5bb11fe595a4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_MISC_H__ +#define __BPF_MISC_H__ + +#if defined(__TARGET_ARCH_x86) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__x64_" +#elif defined(__TARGET_ARCH_s390) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__s390x_" +#elif defined(__TARGET_ARCH_arm64) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__arm64_" +#else +#define SYSCALL_WRAPPER 0 +#define SYS_PREFIX "__se_" +#endif + +#endif diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c new file mode 100644 index 000000000000..05838ed9b89c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2022 Sony Group Corporation */ +#include <vmlinux.h> + +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +int arg1 = 0; +unsigned long arg2 = 0; +unsigned long arg3 = 0; +unsigned long arg4_cx = 0; +unsigned long arg4 = 0; +unsigned long arg5 = 0; + +int arg1_core = 0; +unsigned long arg2_core = 0; +unsigned long arg3_core = 0; +unsigned long arg4_core_cx = 0; +unsigned long arg4_core = 0; +unsigned long arg5_core = 0; + +int option_syscall = 0; +unsigned long arg2_syscall = 0; +unsigned long arg3_syscall = 0; +unsigned long arg4_syscall = 0; +unsigned long arg5_syscall = 0; + +const volatile pid_t filter_pid = 0; + +SEC("kprobe/" SYS_PREFIX "sys_prctl") +int BPF_KPROBE(handle_sys_prctl) +{ + struct pt_regs *real_regs; + pid_t pid = bpf_get_current_pid_tgid() >> 32; + unsigned long tmp = 0; + + if (pid != filter_pid) + return 0; + + real_regs = PT_REGS_SYSCALL_REGS(ctx); + + /* test for PT_REGS_PARM */ + +#if !defined(bpf_target_arm64) && !defined(bpf_target_s390) + bpf_probe_read_kernel(&tmp, sizeof(tmp), &PT_REGS_PARM1_SYSCALL(real_regs)); +#endif + arg1 = tmp; + bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs)); + bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs)); + bpf_probe_read_kernel(&arg4_cx, sizeof(arg4_cx), &PT_REGS_PARM4(real_regs)); + bpf_probe_read_kernel(&arg4, sizeof(arg4), &PT_REGS_PARM4_SYSCALL(real_regs)); + bpf_probe_read_kernel(&arg5, sizeof(arg5), &PT_REGS_PARM5_SYSCALL(real_regs)); + + /* test for the CORE variant of PT_REGS_PARM */ + arg1_core = PT_REGS_PARM1_CORE_SYSCALL(real_regs); + arg2_core = PT_REGS_PARM2_CORE_SYSCALL(real_regs); + arg3_core = PT_REGS_PARM3_CORE_SYSCALL(real_regs); + arg4_core_cx = PT_REGS_PARM4_CORE(real_regs); + arg4_core = PT_REGS_PARM4_CORE_SYSCALL(real_regs); + arg5_core = PT_REGS_PARM5_CORE_SYSCALL(real_regs); + + return 0; +} + +SEC("kprobe/" SYS_PREFIX "sys_prctl") +int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != filter_pid) + return 0; + + option_syscall = option; + arg2_syscall = arg2; + arg3_syscall = arg3; + arg4_syscall = arg4; + arg5_syscall = arg5; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_user.c b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c new file mode 100644 index 000000000000..5523f77c5a44 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct bpf_testmod_btf_type_tag_1 { + int a; +}; + +struct bpf_testmod_btf_type_tag_2 { + struct bpf_testmod_btf_type_tag_1 *p; +}; + +int g; + +SEC("fentry/bpf_testmod_test_btf_type_tag_user_1") +int BPF_PROG(test_user1, struct bpf_testmod_btf_type_tag_1 *arg) +{ + g = arg->a; + return 0; +} + +SEC("fentry/bpf_testmod_test_btf_type_tag_user_2") +int BPF_PROG(test_user2, struct bpf_testmod_btf_type_tag_2 *arg) +{ + g = arg->p->a; + return 0; +} + +/* int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, + * int __user *usockaddr_len); + */ +SEC("fentry/__sys_getsockname") +int BPF_PROG(test_sys_getsockname, int fd, struct sockaddr *usockaddr, + int *usockaddr_len) +{ + g = usockaddr->sa_family; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c index 13499cc15c7d..2715fe27d4cf 100644 --- a/tools/testing/selftests/bpf/progs/core_kern.c +++ b/tools/testing/selftests/bpf/progs/core_kern.c @@ -101,4 +101,20 @@ int balancer_ingress(struct __sk_buff *ctx) return 0; } +typedef int (*func_proto_typedef___match)(long); +typedef int (*func_proto_typedef___doesnt_match)(char *); +typedef int (*func_proto_typedef_nested1)(func_proto_typedef___match); + +int proto_out[3]; + +SEC("raw_tracepoint/sys_enter") +int core_relo_proto(void *ctx) +{ + proto_out[0] = bpf_core_type_exists(func_proto_typedef___match); + proto_out[1] = bpf_core_type_exists(func_proto_typedef___doesnt_match); + proto_out[2] = bpf_core_type_exists(func_proto_typedef_nested1); + + return 0; +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/core_kern_overflow.c b/tools/testing/selftests/bpf/progs/core_kern_overflow.c new file mode 100644 index 000000000000..f0d5652256ba --- /dev/null +++ b/tools/testing/selftests/bpf/progs/core_kern_overflow.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +typedef int (*func_proto_typedef)(long); +typedef int (*func_proto_typedef_nested1)(func_proto_typedef); +typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1); + +int proto_out; + +SEC("raw_tracepoint/sys_enter") +int core_relo_proto(void *ctx) +{ + proto_out = bpf_core_type_exists(func_proto_typedef_nested2); + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c index bca92c9bd29a..106dc75efcc4 100644 --- a/tools/testing/selftests/bpf/progs/fexit_sleep.c +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -3,6 +3,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" char LICENSE[] SEC("license") = "GPL"; @@ -10,8 +11,8 @@ int pid = 0; int fentry_cnt = 0; int fexit_cnt = 0; -SEC("fentry/__x64_sys_nanosleep") -int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int nanosleep_fentry(void *ctx) { if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; @@ -20,8 +21,8 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) return 0; } -SEC("fexit/__x64_sys_nanosleep") -int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) +SEC("fexit/" SYS_PREFIX "sys_nanosleep") +int nanosleep_fexit(void *ctx) { if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c index e5ab4836a641..45204fe0c570 100644 --- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c +++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -18,7 +19,7 @@ const volatile int batch_cnt = 0; long sample_val = 42; long dropped __attribute__((aligned(128))) = 0; -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int bench_perfbuf(void *ctx) { __u64 *sample; diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c index 123607d314d6..6a468496f539 100644 --- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c +++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -30,7 +31,7 @@ static __always_inline long get_flags() return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int bench_ringbuf(void *ctx) { long *sample, flags; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index d0298dccedcd..c8d810010a94 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -72,7 +72,8 @@ int _getsockopt(struct bpf_sockopt *ctx) * reasons. */ - if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) + /* Check that optval contains address (__u64) */ + if (optval + sizeof(__u64) > optval_end) return 0; /* bounds check */ if (((struct tcp_zerocopy_receive *)optval)->address != 0) diff --git a/tools/testing/selftests/bpf/progs/btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c index c88ccc53529a..c88ccc53529a 100644 --- a/tools/testing/selftests/bpf/progs/btf_decl_tag.c +++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 8812a90da4eb..702578a5e496 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,20 +7,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> - -#if defined(__TARGET_ARCH_x86) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__x64_" -#elif defined(__TARGET_ARCH_s390) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__s390x_" -#elif defined(__TARGET_ARCH_arm64) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__arm64_" -#else -#define SYSCALL_WRAPPER 0 -#define SYS_PREFIX "" -#endif +#include "bpf_misc.h" static struct sockaddr_in old; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c index eaa7d9dba0be..5bdc0d38efc0 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -35,7 +36,7 @@ long prod_pos = 0; /* inner state */ long seq = 0; -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int test_ringbuf(void *ctx) { int cur_pid = bpf_get_current_pid_tgid() >> 32; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 83b0aaa52ef7..bf5b7caefdd0 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -392,6 +392,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, family; + __u32 val_u32; bool v4; v4 = (ctx->family == AF_INET); @@ -418,6 +419,11 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) if (LSW(ctx->remote_port, 0) != SRC_PORT) return SK_DROP; + /* Load from remote_port field with zero padding (backward compatibility) */ + val_u32 = *(__u32 *)&ctx->remote_port; + if (val_u32 != bpf_htonl(bpf_ntohs(SRC_PORT) << 16)) + return SK_DROP; + /* Narrow loads from local_port field. Expect DST_PORT. */ if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) || LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) || diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 81b57b9aaaea..246f1f001813 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -12,6 +12,7 @@ enum bpf_linum_array_idx { EGRESS_LINUM_IDX, INGRESS_LINUM_IDX, + READ_SK_DST_PORT_LINUM_IDX, __NR_BPF_LINUM_ARRAY_IDX, }; @@ -250,4 +251,44 @@ int ingress_read_sock_fields(struct __sk_buff *skb) return CG_OK; } +static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) +{ + __u32 *word = (__u32 *)&sk->dst_port; + return word[0] == bpf_htonl(0xcafe0000); +} + +static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) +{ + __u16 *half = (__u16 *)&sk->dst_port; + return half[0] == bpf_htons(0xcafe); +} + +static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk) +{ + __u8 *byte = (__u8 *)&sk->dst_port; + return byte[0] == 0xca && byte[1] == 0xfe; +} + +SEC("cgroup_skb/egress") +int read_sk_dst_port(struct __sk_buff *skb) +{ + __u32 linum, linum_idx; + struct bpf_sock *sk; + + linum_idx = READ_SK_DST_PORT_LINUM_IDX; + + sk = skb->sk; + if (!sk) + RET_LOG(); + + if (!sk_dst_port__load_word(sk)) + RET_LOG(); + if (!sk_dst_port__load_half(sk)) + RET_LOG(); + if (!sk_dst_port__load_byte(sk)) + RET_LOG(); + + return CG_OK; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c index 62fb7cd4d87a..97ed625bb70a 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c @@ -12,7 +12,7 @@ struct { __uint(max_entries, 4); } cpu_map SEC(".maps"); -SEC("xdp_cpumap/dummy_cm") +SEC("xdp/cpumap") int xdp_dummy_cm(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 48007f17dfa8..20ec6723df18 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -24,7 +24,7 @@ int xdp_dummy_prog(struct xdp_md *ctx) return XDP_PASS; } -SEC("xdp_cpumap/dummy_cm") +SEC("xdp/cpumap") int xdp_dummy_cm(struct xdp_md *ctx) { if (ctx->ingress_ifindex == IFINDEX_LO) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c index e1caf510b7d2..cdcf7de7ec8c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c @@ -12,7 +12,7 @@ struct { /* valid program on DEVMAP entry via SEC name; * has access to egress and ingress ifindex */ -SEC("xdp_devmap/map_prog") +SEC("xdp/devmap") int xdp_dummy_dm(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 8ae11fab8316..4139a14f9996 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -27,7 +27,7 @@ int xdp_dummy_prog(struct xdp_md *ctx) /* valid program on DEVMAP entry via SEC name; * has access to egress and ingress ifindex */ -SEC("xdp_devmap/map_prog") +SEC("xdp/devmap") int xdp_dummy_dm(struct xdp_md *ctx) { char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n"; diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c index 119582aa105a..6695478c2b25 100644 --- a/tools/testing/selftests/bpf/progs/trace_printk.c +++ b/tools/testing/selftests/bpf/progs/trace_printk.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -12,7 +13,7 @@ int trace_printk_ran = 0; const char fmt[] = "Testing,testing %d\n"; -SEC("fentry/__x64_sys_nanosleep") +SEC("fentry/" SYS_PREFIX "sys_nanosleep") int sys_enter(void *ctx) { trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt), diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c index d327241ba047..969306cd4f33 100644 --- a/tools/testing/selftests/bpf/progs/trace_vprintk.c +++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -11,7 +12,7 @@ int null_data_vprintk_ret = 0; int trace_vprintk_ret = 0; int trace_vprintk_ran = 0; -SEC("fentry/__x64_sys_nanosleep") +SEC("fentry/" SYS_PREFIX "sys_nanosleep") int sys_enter(void *ctx) { static const char one[] = "1"; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2098f3f27f18..2ab049b54d6c 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -5,6 +5,7 @@ #include <asm/unistd.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -25,28 +26,28 @@ int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id) return 0; } -SEC("kprobe/__x64_sys_getpgid") +SEC("kprobe/" SYS_PREFIX "sys_getpgid") int bench_trigger_kprobe(void *ctx) { __sync_add_and_fetch(&hits, 1); return 0; } -SEC("fentry/__x64_sys_getpgid") +SEC("fentry/" SYS_PREFIX "sys_getpgid") int bench_trigger_fentry(void *ctx) { __sync_add_and_fetch(&hits, 1); return 0; } -SEC("fentry.s/__x64_sys_getpgid") +SEC("fentry.s/" SYS_PREFIX "sys_getpgid") int bench_trigger_fentry_sleep(void *ctx) { __sync_add_and_fetch(&hits, 1); return 0; } -SEC("fmod_ret/__x64_sys_getpgid") +SEC("fmod_ret/" SYS_PREFIX "sys_getpgid") int bench_trigger_fmodret(void *ctx) { __sync_add_and_fetch(&hits, 1); diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c index 8395782b6e0a..97b26a30b59a 100644 --- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c @@ -70,7 +70,7 @@ int xdp_redirect_map_all_prog(struct xdp_md *ctx) BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); } -SEC("xdp_devmap/map_prog") +SEC("xdp/devmap") int xdp_devmap_prog(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index b9f1bbbc8aba..6e6235185a86 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -61,7 +61,11 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, }; __u8 data[64] = {}; int mfd, pfd, ret, zero = 0; - __u32 retval = 0; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = data, + .data_size_in = sizeof(data), + .repeat = 1, + ); mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL); if (mfd < 0) @@ -75,9 +79,8 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, return -1; } - ret = bpf_prog_test_run(pfd, 1, data, sizeof(data), - NULL, NULL, &retval, NULL); - if (ret < 0 || retval != 42) { + ret = bpf_prog_test_run_opts(pfd, &topts); + if (ret < 0 || topts.retval != 42) { ret = -1; } else { assert(!bpf_map_lookup_elem(mfd, &zero, value)); diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 5620919fde9e..826f4423ce02 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -23,6 +23,12 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +readonly NS1="ns1-$(mktemp -u XXXXXX)" +readonly NS2="ns2-$(mktemp -u XXXXXX)" +readonly NS3="ns3-$(mktemp -u XXXXXX)" +readonly NS4="ns4-$(mktemp -u XXXXXX)" +readonly NS5="ns5-$(mktemp -u XXXXXX)" +readonly NS6="ns6-$(mktemp -u XXXXXX)" msg="skip all tests:" if [ $UID != 0 ]; then @@ -41,23 +47,23 @@ cleanup() fi set +e - ip netns del ns1 2> /dev/null - ip netns del ns2 2> /dev/null - ip netns del ns3 2> /dev/null - ip netns del ns4 2> /dev/null - ip netns del ns5 2> /dev/null - ip netns del ns6 2> /dev/null + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null + ip netns del ${NS3} 2> /dev/null + ip netns del ${NS4} 2> /dev/null + ip netns del ${NS5} 2> /dev/null + ip netns del ${NS6} 2> /dev/null rm -f $TMP_FILE } set -e -ip netns add ns1 -ip netns add ns2 -ip netns add ns3 -ip netns add ns4 -ip netns add ns5 -ip netns add ns6 +ip netns add ${NS1} +ip netns add ${NS2} +ip netns add ${NS3} +ip netns add ${NS4} +ip netns add ${NS5} +ip netns add ${NS6} trap cleanup 0 2 3 6 9 @@ -67,78 +73,78 @@ ip link add veth5 type veth peer name veth6 ip link add veth7 type veth peer name veth8 ip link add veth9 type veth peer name veth10 -ip link set veth1 netns ns1 -ip link set veth2 netns ns2 -ip link set veth3 netns ns2 -ip link set veth4 netns ns3 -ip link set veth5 netns ns3 -ip link set veth6 netns ns4 -ip link set veth7 netns ns4 -ip link set veth8 netns ns5 -ip link set veth9 netns ns5 -ip link set veth10 netns ns6 - -ip netns exec ns1 ip link set dev veth1 up -ip netns exec ns2 ip link set dev veth2 up -ip netns exec ns2 ip link set dev veth3 up -ip netns exec ns3 ip link set dev veth4 up -ip netns exec ns3 ip link set dev veth5 up -ip netns exec ns4 ip link set dev veth6 up -ip netns exec ns4 ip link set dev veth7 up -ip netns exec ns5 ip link set dev veth8 up -ip netns exec ns5 ip link set dev veth9 up -ip netns exec ns6 ip link set dev veth10 up -ip netns exec ns6 ip link set dev lo up +ip link set veth1 netns ${NS1} +ip link set veth2 netns ${NS2} +ip link set veth3 netns ${NS2} +ip link set veth4 netns ${NS3} +ip link set veth5 netns ${NS3} +ip link set veth6 netns ${NS4} +ip link set veth7 netns ${NS4} +ip link set veth8 netns ${NS5} +ip link set veth9 netns ${NS5} +ip link set veth10 netns ${NS6} + +ip netns exec ${NS1} ip link set dev veth1 up +ip netns exec ${NS2} ip link set dev veth2 up +ip netns exec ${NS2} ip link set dev veth3 up +ip netns exec ${NS3} ip link set dev veth4 up +ip netns exec ${NS3} ip link set dev veth5 up +ip netns exec ${NS4} ip link set dev veth6 up +ip netns exec ${NS4} ip link set dev veth7 up +ip netns exec ${NS5} ip link set dev veth8 up +ip netns exec ${NS5} ip link set dev veth9 up +ip netns exec ${NS6} ip link set dev veth10 up +ip netns exec ${NS6} ip link set dev lo up # All link scope addresses and routes required between veths -ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link -ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link -ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link -ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link -ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link -ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link -ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link -ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link -ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link -ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link -ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link -ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link -ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link -ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link -ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link -ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link - -ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo -ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21 - -ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 -ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link - -ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65 -ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4 - -ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6 -ip netns exec ns4 ip -6 addr add fc42::1 dev lo -ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87 - -ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 -ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8 - -ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo -ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo - -ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null - -ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null -ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null -ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null - -ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE & -ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" +ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link +ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link +ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link +ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link +ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link +ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link +ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link +ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link +ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link +ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link +ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link +ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link +ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link +ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link +ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link +ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link + +ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo +ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21 + +ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 +ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link + +ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65 +ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4 + +ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo +ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87 + +ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 +ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8 + +ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo +ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo + +ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + +ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null +ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null +ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null + +ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE & +ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment kill -TERM $! diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 50f7e74ca0b9..cbebfaa7c1e8 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -738,7 +738,7 @@ static void test_sockmap(unsigned int tasks, void *data) sizeof(key), sizeof(value), 6, NULL); if (fd < 0) { - if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) { + if (!libbpf_probe_bpf_map_type(BPF_MAP_TYPE_SOCKMAP, NULL)) { printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n", __func__); skips++; diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh index 6413c1472554..102e6588e2fe 100755 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh @@ -4,6 +4,7 @@ # Copyright (c) 2019 Cloudflare set -eu +readonly NS1="ns1-$(mktemp -u XXXXXX)" wait_for_ip() { @@ -28,12 +29,12 @@ get_prog_id() ns1_exec() { - ip netns exec ns1 "$@" + ip netns exec ${NS1} "$@" } setup() { - ip netns add ns1 + ip netns add ${NS1} ns1_exec ip link set lo up ns1_exec sysctl -w net.ipv4.tcp_syncookies=2 diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 29bbaa58233c..92e3465fbae8 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -456,7 +456,7 @@ static int probe_filter_length(const struct bpf_insn *fp) static bool skip_unsupported_map(enum bpf_map_type map_type) { - if (!bpf_probe_map_type(map_type, 0)) { + if (!libbpf_probe_bpf_map_type(map_type, NULL)) { printf("SKIP (unsupported map type %d)\n", map_type); skips++; return true; @@ -1021,13 +1021,18 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, { __u8 tmp[TEST_DATA_LEN << 2]; __u32 size_tmp = sizeof(tmp); - uint32_t retval; int err, saved_errno; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = data, + .data_size_in = size_data, + .data_out = tmp, + .data_size_out = size_tmp, + .repeat = 1, + ); if (unpriv) set_admin(true); - err = bpf_prog_test_run(fd_prog, 1, data, size_data, - tmp, &size_tmp, &retval, NULL); + err = bpf_prog_test_run_opts(fd_prog, &topts); saved_errno = errno; if (unpriv) @@ -1051,9 +1056,8 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, } } - if (retval != expected_val && - expected_val != POINTER_VALUE) { - printf("FAIL retval %d != %d ", retval, expected_val); + if (topts.retval != expected_val && expected_val != POINTER_VALUE) { + printf("FAIL retval %d != %d ", topts.retval, expected_val); return 1; } @@ -1176,7 +1180,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, * bpf_probe_prog_type won't give correct answer */ if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING && - !bpf_probe_prog_type(prog_type, 0)) { + !libbpf_probe_bpf_prog_type(prog_type, NULL)) { printf("SKIP (unsupported program type %d)\n", prog_type); skips++; goto close_fds; diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index d10cefd6eb09..ea69370caae3 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -2,6 +2,8 @@ # Kselftest framework requirement - SKIP code is 4. readonly KSFT_SKIP=4 +readonly NS1="ns1-$(mktemp -u XXXXXX)" +readonly NS2="ns2-$(mktemp -u XXXXXX)" cleanup() { @@ -13,8 +15,8 @@ cleanup() set +e ip link del veth1 2> /dev/null - ip netns del ns1 2> /dev/null - ip netns del ns2 2> /dev/null + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null } ip link set dev lo xdp off 2>/dev/null > /dev/null @@ -24,32 +26,32 @@ if [ $? -ne 0 ];then fi set -e -ip netns add ns1 -ip netns add ns2 +ip netns add ${NS1} +ip netns add ${NS2} trap cleanup 0 2 3 6 9 ip link add veth1 type veth peer name veth2 -ip link set veth1 netns ns1 -ip link set veth2 netns ns2 +ip link set veth1 netns ${NS1} +ip link set veth2 netns ${NS2} -ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1 -ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2 +ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1 +ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 -ip netns exec ns1 tc qdisc add dev veth1 clsact -ip netns exec ns2 tc qdisc add dev veth2 clsact +ip netns exec ${NS1} tc qdisc add dev veth1 clsact +ip netns exec ${NS2} tc qdisc add dev veth2 clsact -ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t -ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t -ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x -ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x +ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x +ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x -ip netns exec ns1 ip link set dev veth1 up -ip netns exec ns2 ip link set dev veth2 up +ip netns exec ${NS1} ip link set dev veth1 up +ip netns exec ${NS2} ip link set dev veth2 up -ip netns exec ns1 ping -c 1 10.1.1.22 -ip netns exec ns2 ping -c 1 10.1.1.11 +ip netns exec ${NS1} ping -c 1 10.1.1.22 +ip netns exec ${NS2} ping -c 1 10.1.1.11 exit 0 diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index 57c8db9972a6..1d79f31480ad 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -10,6 +10,8 @@ # | xdp forwarding | # ------------------ +readonly NS1="ns1-$(mktemp -u XXXXXX)" +readonly NS2="ns2-$(mktemp -u XXXXXX)" ret=0 setup() @@ -17,27 +19,27 @@ setup() local xdpmode=$1 - ip netns add ns1 - ip netns add ns2 + ip netns add ${NS1} + ip netns add ${NS2} - ip link add veth1 index 111 type veth peer name veth11 netns ns1 - ip link add veth2 index 222 type veth peer name veth22 netns ns2 + ip link add veth1 index 111 type veth peer name veth11 netns ${NS1} + ip link add veth2 index 222 type veth peer name veth22 netns ${NS2} ip link set veth1 up ip link set veth2 up - ip -n ns1 link set dev veth11 up - ip -n ns2 link set dev veth22 up + ip -n ${NS1} link set dev veth11 up + ip -n ${NS2} link set dev veth22 up - ip -n ns1 addr add 10.1.1.11/24 dev veth11 - ip -n ns2 addr add 10.1.1.22/24 dev veth22 + ip -n ${NS1} addr add 10.1.1.11/24 dev veth11 + ip -n ${NS2} addr add 10.1.1.22/24 dev veth22 } cleanup() { ip link del veth1 2> /dev/null ip link del veth2 2> /dev/null - ip netns del ns1 2> /dev/null - ip netns del ns2 2> /dev/null + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null } test_xdp_redirect() @@ -52,13 +54,13 @@ test_xdp_redirect() return 0 fi - ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null - ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null + ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null + ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null - if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null && - ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then + if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null && + ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then echo "selftests: test_xdp_redirect $xdpmode [PASS]"; else ret=1 diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index 05f872740999..cc57cb87e65f 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress" PASS=0 FAIL=0 LOG_DIR=$(mktemp -d) +declare -a NS +NS[0]="ns0-$(mktemp -u XXXXXX)" +NS[1]="ns1-$(mktemp -u XXXXXX)" +NS[2]="ns2-$(mktemp -u XXXXXX)" +NS[3]="ns3-$(mktemp -u XXXXXX)" test_pass() { @@ -47,11 +52,9 @@ test_fail() clean_up() { - for i in $(seq $NUM); do - ip link del veth$i 2> /dev/null - ip netns del ns$i 2> /dev/null + for i in $(seq 0 $NUM); do + ip netns del ${NS[$i]} 2> /dev/null done - ip netns del ns0 2> /dev/null } # Kselftest framework requirement - SKIP code is 4. @@ -79,23 +82,22 @@ setup_ns() mode="xdpdrv" fi - ip netns add ns0 + ip netns add ${NS[0]} for i in $(seq $NUM); do - ip netns add ns$i - ip -n ns$i link add veth0 index 2 type veth \ - peer name veth$i netns ns0 index $((1 + $i)) - ip -n ns0 link set veth$i up - ip -n ns$i link set veth0 up - - ip -n ns$i addr add 192.0.2.$i/24 dev veth0 - ip -n ns$i addr add 2001:db8::$i/64 dev veth0 + ip netns add ${NS[$i]} + ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]} + ip -n ${NS[$i]} link set veth0 up + ip -n ${NS[0]} link set veth$i up + + ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0 + ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0 # Add a neigh entry for IPv4 ping test - ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 - ip -n ns$i link set veth0 $mode obj \ + ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 + ip -n ${NS[$i]} link set veth0 $mode obj \ xdp_dummy.o sec xdp &> /dev/null || \ { test_fail "Unable to load dummy xdp" && exit 1; } IFACES="$IFACES veth$i" - veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}') + veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}') done } @@ -104,10 +106,10 @@ do_egress_tests() local mode=$1 # mac test - ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & + ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & + ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & sleep 0.5 - ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null sleep 0.5 pkill tcpdump @@ -123,18 +125,18 @@ do_ping_tests() local mode=$1 # ping6 test: echo request should be redirect back to itself, not others - ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 + ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 - ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & - ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & + ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & + ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & + ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & sleep 0.5 # ARP test - ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254 + ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254 # IPv4 test - ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null + ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null # IPv6 test - ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null + ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null sleep 0.5 pkill tcpdump @@ -180,7 +182,7 @@ do_tests() xdpgeneric) drv_p="-S";; esac - ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & + ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & xdp_pid=$! sleep 1 if ! ps -p $xdp_pid > /dev/null; then @@ -197,10 +199,10 @@ do_tests() kill $xdp_pid } -trap clean_up EXIT - check_env +trap clean_up EXIT + for mode in ${DRV_MODE}; do setup_ns $mode do_tests $mode diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index a3a1eaee26ea..392d28cc4e58 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -22,6 +22,9 @@ ksft_skip=4 TESTNAME=xdp_veth BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) BPF_DIR=$BPF_FS/test_$TESTNAME +readonly NS1="ns1-$(mktemp -u XXXXXX)" +readonly NS2="ns2-$(mktemp -u XXXXXX)" +readonly NS3="ns3-$(mktemp -u XXXXXX)" _cleanup() { @@ -29,9 +32,9 @@ _cleanup() ip link del veth1 2> /dev/null ip link del veth2 2> /dev/null ip link del veth3 2> /dev/null - ip netns del ns1 2> /dev/null - ip netns del ns2 2> /dev/null - ip netns del ns3 2> /dev/null + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null + ip netns del ${NS3} 2> /dev/null rm -rf $BPF_DIR 2> /dev/null } @@ -77,24 +80,24 @@ set -e trap cleanup_skip EXIT -ip netns add ns1 -ip netns add ns2 -ip netns add ns3 +ip netns add ${NS1} +ip netns add ${NS2} +ip netns add ${NS3} -ip link add veth1 index 111 type veth peer name veth11 netns ns1 -ip link add veth2 index 122 type veth peer name veth22 netns ns2 -ip link add veth3 index 133 type veth peer name veth33 netns ns3 +ip link add veth1 index 111 type veth peer name veth11 netns ${NS1} +ip link add veth2 index 122 type veth peer name veth22 netns ${NS2} +ip link add veth3 index 133 type veth peer name veth33 netns ${NS3} ip link set veth1 up ip link set veth2 up ip link set veth3 up -ip -n ns1 addr add 10.1.1.11/24 dev veth11 -ip -n ns3 addr add 10.1.1.33/24 dev veth33 +ip -n ${NS1} addr add 10.1.1.11/24 dev veth11 +ip -n ${NS3} addr add 10.1.1.33/24 dev veth33 -ip -n ns1 link set dev veth11 up -ip -n ns2 link set dev veth22 up -ip -n ns3 link set dev veth33 up +ip -n ${NS1} link set dev veth11 up +ip -n ${NS2} link set dev veth22 up +ip -n ${NS3} link set dev veth33 up mkdir $BPF_DIR bpftool prog loadall \ @@ -107,12 +110,12 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 -ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp -ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp -ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp +ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp +ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp +ip -n ${NS3} link set dev veth33 xdp obj xdp_dummy.o sec xdp trap cleanup EXIT -ip netns exec ns1 ping -c 1 -W 1 10.1.1.33 +ip netns exec ${NS1} ping -c 1 -W 1 10.1.1.33 exit 0 diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh index 0cbc7604a2f8..810c407e0286 100755 --- a/tools/testing/selftests/bpf/test_xdp_vlan.sh +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -4,6 +4,8 @@ # Kselftest framework requirement - SKIP code is 4. readonly KSFT_SKIP=4 +readonly NS1="ns1-$(mktemp -u XXXXXX)" +readonly NS2="ns2-$(mktemp -u XXXXXX)" # Allow wrapper scripts to name test if [ -z "$TESTNAME" ]; then @@ -49,15 +51,15 @@ cleanup() if [ -n "$INTERACTIVE" ]; then echo "Namespace setup still active explore with:" - echo " ip netns exec ns1 bash" - echo " ip netns exec ns2 bash" + echo " ip netns exec ${NS1} bash" + echo " ip netns exec ${NS2} bash" exit $status fi set +e ip link del veth1 2> /dev/null - ip netns del ns1 2> /dev/null - ip netns del ns2 2> /dev/null + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null } # Using external program "getopt" to get --long-options @@ -126,8 +128,8 @@ fi # Interactive mode likely require us to cleanup netns if [ -n "$INTERACTIVE" ]; then ip link del veth1 2> /dev/null - ip netns del ns1 2> /dev/null - ip netns del ns2 2> /dev/null + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null fi # Exit on failure @@ -144,8 +146,8 @@ if [ -n "$VERBOSE" ]; then fi # Create two namespaces -ip netns add ns1 -ip netns add ns2 +ip netns add ${NS1} +ip netns add ${NS2} # Run cleanup if failing or on kill trap cleanup 0 2 3 6 9 @@ -154,44 +156,44 @@ trap cleanup 0 2 3 6 9 ip link add veth1 type veth peer name veth2 # Move veth1 and veth2 into the respective namespaces -ip link set veth1 netns ns1 -ip link set veth2 netns ns2 +ip link set veth1 netns ${NS1} +ip link set veth2 netns ${NS2} # NOTICE: XDP require VLAN header inside packet payload # - Thus, disable VLAN offloading driver features # - For veth REMEMBER TX side VLAN-offload # # Disable rx-vlan-offload (mostly needed on ns1) -ip netns exec ns1 ethtool -K veth1 rxvlan off -ip netns exec ns2 ethtool -K veth2 rxvlan off +ip netns exec ${NS1} ethtool -K veth1 rxvlan off +ip netns exec ${NS2} ethtool -K veth2 rxvlan off # # Disable tx-vlan-offload (mostly needed on ns2) -ip netns exec ns2 ethtool -K veth2 txvlan off -ip netns exec ns1 ethtool -K veth1 txvlan off +ip netns exec ${NS2} ethtool -K veth2 txvlan off +ip netns exec ${NS1} ethtool -K veth1 txvlan off export IPADDR1=100.64.41.1 export IPADDR2=100.64.41.2 # In ns1/veth1 add IP-addr on plain net_device -ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1 -ip netns exec ns1 ip link set veth1 up +ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1 +ip netns exec ${NS1} ip link set veth1 up # In ns2/veth2 create VLAN device export VLAN=4011 export DEVNS2=veth2 -ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN -ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN -ip netns exec ns2 ip link set $DEVNS2 up -ip netns exec ns2 ip link set $DEVNS2.$VLAN up +ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN +ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN +ip netns exec ${NS2} ip link set $DEVNS2 up +ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up # Bringup lo in netns (to avoids confusing people using --interactive) -ip netns exec ns1 ip link set lo up -ip netns exec ns2 ip link set lo up +ip netns exec ${NS1} ip link set lo up +ip netns exec ${NS2} ip link set lo up # At this point, the hosts cannot reach each-other, # because ns2 are using VLAN tags on the packets. -ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"' +ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"' # Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags @@ -202,19 +204,19 @@ export FILE=test_xdp_vlan.o # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" export XDP_PROG=xdp_vlan_change -ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG # In ns1: egress use TC to add back VLAN tag 4011 # (del cmd) # tc qdisc del dev $DEVNS1 clsact 2> /dev/null # -ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact -ip netns exec ns1 tc filter add dev $DEVNS1 egress \ +ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact +ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \ prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push # Now the namespaces can reach each-other, test with ping: -ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1 -ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2 +ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 +ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 # Second test: Replace xdp prog, that fully remove vlan header # @@ -223,9 +225,9 @@ ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2 # ETH_P_8021Q indication, and this cause overwriting of our changes. # export XDP_PROG=xdp_vlan_remove_outer2 -ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE off -ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG # Now the namespaces should still be able reach each-other, test with ping: -ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1 -ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2 +ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 +ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 7b7f918eda77..ca6abae9b09c 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -138,6 +138,29 @@ void read_trace_pipe(void) } } +ssize_t get_uprobe_offset(const void *addr) +{ + size_t start, end, base; + char buf[256]; + bool found = false; + FILE *f; + + f = fopen("/proc/self/maps", "r"); + if (!f) + return -errno; + + while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { + if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { + found = true; + break; + } + } + + fclose(f); + + if (!found) + return -ESRCH; + #if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2 #define OP_RT_RA_MASK 0xffff0000UL @@ -145,10 +168,6 @@ void read_trace_pipe(void) #define ADDIS_R2_R12 0x3c4c0000UL #define ADDI_R2_R2 0x38420000UL -ssize_t get_uprobe_offset(const void *addr, ssize_t base) -{ - u32 *insn = (u32 *)(uintptr_t)addr; - /* * A PPC64 ABIv2 function may have a local and a global entry * point. We need to use the local entry point when patching @@ -165,43 +184,16 @@ ssize_t get_uprobe_offset(const void *addr, ssize_t base) * lis r2,XXXX * addi r2,r2,XXXX */ - if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || - ((*insn & OP_RT_RA_MASK) == LIS_R2)) && - ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2)) - return (ssize_t)(insn + 2) - base; - else - return (uintptr_t)addr - base; -} - -#else + { + const u32 *insn = (const u32 *)(uintptr_t)addr; -ssize_t get_uprobe_offset(const void *addr, ssize_t base) -{ - return (uintptr_t)addr - base; -} - -#endif - -ssize_t get_base_addr(void) -{ - size_t start, offset; - char buf[256]; - FILE *f; - - f = fopen("/proc/self/maps", "r"); - if (!f) - return -errno; - - while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n", - &start, buf, &offset) == 3) { - if (strcmp(buf, "r-xp") == 0) { - fclose(f); - return start - offset; - } + if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || + ((*insn & OP_RT_RA_MASK) == LIS_R2)) && + ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2)) + return (uintptr_t)(insn + 2) - start + base; } - - fclose(f); - return -EINVAL; +#endif + return (uintptr_t)addr - start + base; } ssize_t get_rel_offset(uintptr_t addr) diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index d907b445524d..238a9c98cde2 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -18,8 +18,7 @@ int kallsyms_find(const char *sym, unsigned long long *addr); void read_trace_pipe(void); -ssize_t get_uprobe_offset(const void *addr, ssize_t base); -ssize_t get_base_addr(void); +ssize_t get_uprobe_offset(const void *addr); ssize_t get_rel_offset(uintptr_t addr); #endif diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index ce13ece08d51..8c224eac93df 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -121,7 +121,25 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [narrow load]", + "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [half load]", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -139,7 +157,64 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", + "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -149,7 +224,7 @@ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c index 51c8224b4ccc..aaedbf4955c3 100644 --- a/tools/testing/selftests/bpf/xdp_redirect_multi.c +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -32,12 +32,12 @@ static void int_exit(int sig) int i; for (i = 0; ifaces[i] > 0; i++) { - if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(1); } if (prog_id) - bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags); + bpf_xdp_detach(ifaces[i], xdp_flags, NULL); } exit(0); @@ -210,7 +210,7 @@ int main(int argc, char **argv) } /* bind prog_fd to each interface */ - ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags); + ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL); if (ret) { printf("Set xdp fd failed on %d\n", ifindex); goto err_out; diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index baa870a759a2..c567856fd1bc 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -29,7 +29,7 @@ static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static void cleanup(int sig) { - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); if (sig) exit(1); } @@ -203,7 +203,7 @@ int main(int argc, char **argv) printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n"); - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { fprintf(stderr, "Link set xdp fd failed for %s\n", ifname); goto done; } diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index ffa5502ad95e..5f8296d29e77 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -266,22 +266,24 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size } static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, - struct ifobject *ifobject, u32 qid) + struct ifobject *ifobject, bool shared) { - struct xsk_socket_config cfg; + struct xsk_socket_config cfg = {}; struct xsk_ring_cons *rxr; struct xsk_ring_prod *txr; xsk->umem = umem; cfg.rx_size = xsk->rxqsize; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg.libbpf_flags = 0; + cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; cfg.xdp_flags = ifobject->xdp_flags; cfg.bind_flags = ifobject->bind_flags; + if (shared) + cfg.bind_flags |= XDP_SHARED_UMEM; txr = ifobject->tx_on ? &xsk->tx : NULL; rxr = ifobject->rx_on ? &xsk->rx : NULL; - return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg); + return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg); } static struct option long_options[] = { @@ -387,7 +389,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, for (i = 0; i < MAX_INTERFACES; i++) { struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - ifobj->umem = &ifobj->umem_arr[0]; ifobj->xsk = &ifobj->xsk_arr[0]; ifobj->use_poll = false; ifobj->pacing_on = true; @@ -401,11 +402,12 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, ifobj->tx_on = false; } + memset(ifobj->umem, 0, sizeof(*ifobj->umem)); + ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; + ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + for (j = 0; j < MAX_SOCKETS; j++) { - memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j])); memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); - ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS; - ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; } } @@ -950,7 +952,10 @@ static void tx_stats_validate(struct ifobject *ifobject) static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) { + u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + int ret, ifindex; + void *bufs; u32 i; ifobject->ns_fd = switch_namespace(ifobject->nsname); @@ -958,23 +963,20 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (ifobject->umem->unaligned_mode) mmap_flags |= MAP_HUGETLB; - for (i = 0; i < test->nb_sockets; i++) { - u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; - u32 ctr = 0; - void *bufs; - int ret; + bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); - bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); + ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); + if (ret) + exit_with_error(-ret); - ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz); - if (ret) - exit_with_error(-ret); + for (i = 0; i < test->nb_sockets; i++) { + u32 ctr = 0; while (ctr++ < SOCK_RECONF_CTR) { - ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i], - ifobject, i); + ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem, + ifobject, !!i); if (!ret) break; @@ -985,8 +987,22 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) } } - ifobject->umem = &ifobject->umem_arr[0]; ifobject->xsk = &ifobject->xsk_arr[0]; + + if (!ifobject->rx_on) + return; + + ifindex = if_nametoindex(ifobject->ifname); + if (!ifindex) + exit_with_error(errno); + + ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd); + if (ret) + exit_with_error(-ret); + + ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); + if (ret) + exit_with_error(-ret); } static void testapp_cleanup_xsk_res(struct ifobject *ifobj) @@ -1142,14 +1158,16 @@ static void testapp_bidi(struct test_spec *test) static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) { + int ret; + xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_umem__delete(ifobj_tx->umem->umem); xsk_socket__delete(ifobj_rx->xsk->xsk); - xsk_umem__delete(ifobj_rx->umem->umem); - ifobj_tx->umem = &ifobj_tx->umem_arr[1]; ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; - ifobj_rx->umem = &ifobj_rx->umem_arr[1]; ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; + + ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd); + if (ret) + exit_with_error(-ret); } static void testapp_bpf_res(struct test_spec *test) @@ -1408,13 +1426,13 @@ static struct ifobject *ifobject_create(void) if (!ifobj->xsk_arr) goto out_xsk_arr; - ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr)); - if (!ifobj->umem_arr) - goto out_umem_arr; + ifobj->umem = calloc(1, sizeof(*ifobj->umem)); + if (!ifobj->umem) + goto out_umem; return ifobj; -out_umem_arr: +out_umem: free(ifobj->xsk_arr); out_xsk_arr: free(ifobj); @@ -1423,7 +1441,7 @@ out_xsk_arr: static void ifobject_delete(struct ifobject *ifobj) { - free(ifobj->umem_arr); + free(ifobj->umem); free(ifobj->xsk_arr); free(ifobj); } diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 2f705f44b748..62a3e6388632 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -125,10 +125,10 @@ struct ifobject { struct xsk_socket_info *xsk; struct xsk_socket_info *xsk_arr; struct xsk_umem_info *umem; - struct xsk_umem_info *umem_arr; thread_func_t func_ptr; struct pkt_stream *pkt_stream; int ns_fd; + int xsk_map_fd; u32 dst_ip; u32 src_ip; u32 xdp_flags; diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 3b0489910422..4f70baad867d 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -114,10 +114,25 @@ fib_rule6_test_match_n_redirect() log_test $? 0 "rule6 del by pref: $description" } +fib_rule6_test_reject() +{ + local match="$1" + local rc + + $IP -6 rule add $match table $RTABLE 2>/dev/null + rc=$? + log_test $rc 2 "rule6 check: $match" + + if [ $rc -eq 0 ]; then + $IP -6 rule del $match table $RTABLE + fi +} + fib_rule6_test() { local getmatch local match + local cnt # setup the fib rule redirect route $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink @@ -128,8 +143,21 @@ fib_rule6_test() match="from $SRC_IP6 iif $DEV" fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + # Reject dsfield (tos) options which have ECN bits set + for cnt in $(seq 1 3); do + match="dsfield $cnt" + fib_rule6_test_reject "$match" + done + + # Don't take ECN bits into account when matching on dsfield match="tos 0x10" - fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + # Using option 'tos' instead of 'dsfield' as old iproute2 + # versions don't support 'dsfield' in ip rule show. + getmatch="tos $cnt" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getmatch redirect to table" + done match="fwmark 0x64" getmatch="mark 0x64" @@ -187,10 +215,25 @@ fib_rule4_test_match_n_redirect() log_test $? 0 "rule4 del by pref: $description" } +fib_rule4_test_reject() +{ + local match="$1" + local rc + + $IP rule add $match table $RTABLE 2>/dev/null + rc=$? + log_test $rc 2 "rule4 check: $match" + + if [ $rc -eq 0 ]; then + $IP rule del $match table $RTABLE + fi +} + fib_rule4_test() { local getmatch local match + local cnt # setup the fib rule redirect route $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink @@ -206,8 +249,21 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" ip netns exec testns sysctl -qw net.ipv4.ip_forward=0 + # Reject dsfield (tos) options which have ECN bits set + for cnt in $(seq 1 3); do + match="dsfield $cnt" + fib_rule4_test_reject "$match" + done + + # Don't take ECN bits into account when matching on dsfield match="tos 0x10" - fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + # Using option 'tos' instead of 'dsfield' as old iproute2 + # versions don't support 'dsfield' in ip rule show. + getmatch="tos $cnt" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getmatch redirect to table" + done match="fwmark 0x64" getmatch="mark 0x64" diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 996af1ae3d3d..bb73235976b3 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1447,6 +1447,81 @@ ipv4_local_rt_cache() log_test $? 0 "Cached route removed from VRF port device" } +ipv4_rt_dsfield() +{ + echo + echo "IPv4 route with dsfield tests" + + run_cmd "$IP route flush 172.16.102.0/24" + + # New routes should reject dsfield options that interfere with ECN + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x01 via 172.16.101.2" + log_test $? 2 "Reject route with dsfield 0x01" + + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x02 via 172.16.101.2" + log_test $? 2 "Reject route with dsfield 0x02" + + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x03 via 172.16.101.2" + log_test $? 2 "Reject route with dsfield 0x03" + + # A generic route that doesn't take DSCP into account + run_cmd "$IP route add 172.16.102.0/24 via 172.16.101.2" + + # A more specific route for DSCP 0x10 + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x10 via 172.16.103.2" + + # DSCP 0x10 should match the specific route, no matter the ECN bits + $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT" + + $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:CE" + + # Unknown DSCP should match the generic route, no matter the ECN bits + $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT" + + $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE" + + # Null DSCP should match the generic route, no matter the ECN bits + $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT" + + $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:CE" +} + ipv4_route_test() { route_setup @@ -1454,6 +1529,7 @@ ipv4_route_test() ipv4_rt_add ipv4_rt_replace ipv4_local_rt_cache + ipv4_rt_dsfield route_cleanup } diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh new file mode 100755 index 000000000000..d14efb2d23b2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh @@ -0,0 +1,201 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by a pedit action. An ingress +# filter installed on $h2 verifies that the packet looks like expected. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_ip4_src + test_ip4_dst + test_ip6_src + test_ip6_dst +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +do_test_pedit_ip() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local match_prot=$1; shift + local match_flower=$1; shift + local mz_flags=$1; shift + + tc filter add $pedit_locus handle 101 pref 1 \ + flower action pedit ex munge $pedit_action + tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \ + flower skip_hw $match_flower action pass + + RET=0 + + $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 -a own -b $h2mac -q -t ip + + local pkts + pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ + tc_rule_handle_stats_get "dev $h2 ingress" 101) + check_err $? "Expected to get 10 packets, but got $pkts." + + pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101) + ((pkts >= 10)) + check_err $? "Expected to get 10 packets on pedit rule, but got $pkts." + + log_test "$pedit_locus pedit $pedit_action" + + tc filter del dev $h2 ingress pref 1 + tc filter del $pedit_locus pref 1 +} + +do_test_pedit_ip6() +{ + local locus=$1; shift + local pedit_addr=$1; shift + local flower_addr=$1; shift + + do_test_pedit_ip "$locus" "$pedit_addr set 2001:db8:2::1" ipv6 \ + "$flower_addr 2001:db8:2::1" \ + "-6 -A 2001:db8:1::1 -B 2001:db8:1::2" +} + +do_test_pedit_ip4() +{ + local locus=$1; shift + local pedit_addr=$1; shift + local flower_addr=$1; shift + + do_test_pedit_ip "$locus" "$pedit_addr set 198.51.100.1" ip \ + "$flower_addr 198.51.100.1" \ + "-A 192.0.2.1 -B 192.0.2.2" +} + +test_ip4_src() +{ + do_test_pedit_ip4 "dev $swp1 ingress" "ip src" src_ip + do_test_pedit_ip4 "dev $swp2 egress" "ip src" src_ip +} + +test_ip4_dst() +{ + do_test_pedit_ip4 "dev $swp1 ingress" "ip dst" dst_ip + do_test_pedit_ip4 "dev $swp2 egress" "ip dst" dst_ip +} + +test_ip6_src() +{ + do_test_pedit_ip6 "dev $swp1 ingress" "ip6 src" src_ip + do_test_pedit_ip6 "dev $swp2 egress" "ip6 src" src_ip +} + +test_ip6_dst() +{ + do_test_pedit_ip6 "dev $swp1 ingress" "ip6 dst" dst_ip + do_test_pedit_ip6 "dev $swp2 egress" "ip6 dst" dst_ip +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index bd106c7ec232..4a565fb84137 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -15,6 +15,7 @@ timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 checksum=0 +ip_mptcp=0 do_all_tests=1 TEST_COUNT=0 @@ -239,6 +240,16 @@ is_v6() [ -z "${1##*:*}" ] } +is_addr() +{ + [ -z "${1##*[.:]*}" ] +} + +is_number() +{ + [[ $1 == ?(-)+([0-9]) ]] +} + # $1: ns, $2: port wait_local_port_listen() { @@ -278,6 +289,109 @@ wait_rm_addr() done } +pm_nl_set_limits() +{ + local ns=$1 + local addrs=$2 + local subflows=$3 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows + else + ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows + fi +} + +pm_nl_add_endpoint() +{ + local ns=$1 + local addr=$2 + local flags + local port + local dev + local id + local nr=2 + + for p in $@ + do + if [ $p = "flags" ]; then + eval _flags=\$"$nr" + [ ! -z $_flags ]; flags="flags $_flags" + fi + if [ $p = "dev" ]; then + eval _dev=\$"$nr" + [ ! -z $_dev ]; dev="dev $_dev" + fi + if [ $p = "id" ]; then + eval _id=\$"$nr" + [ ! -z $_id ]; id="id $_id" + fi + if [ $p = "port" ]; then + eval _port=\$"$nr" + [ ! -z $_port ]; port="port $_port" + fi + + let nr+=1 + done + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port + else + ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port + fi +} + +pm_nl_del_endpoint() +{ + local ns=$1 + local id=$2 + local addr=$3 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint delete id $id $addr + else + ip netns exec $ns ./pm_nl_ctl del $id $addr + fi +} + +pm_nl_flush_endpoint() +{ + local ns=$1 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint flush + else + ip netns exec $ns ./pm_nl_ctl flush + fi +} + +pm_nl_show_endpoints() +{ + local ns=$1 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint show + else + ip netns exec $ns ./pm_nl_ctl dump + fi +} + +pm_nl_change_endpoint() +{ + local ns=$1 + local flags=$2 + local id=$3 + local addr=$4 + local port="" + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint change id $id ${flags//","/" "} + else + if [ $5 -ne 0 ]; then port="port $5"; fi + ip netns exec $ns ./pm_nl_ctl set $addr flags $flags $port + fi +} + do_transfer() { listener_ns="$1" @@ -378,31 +492,36 @@ do_transfer() else addr="10.0.$counter.1" fi - ip netns exec $ns1 ./pm_nl_ctl add $addr flags signal + pm_nl_add_endpoint $ns1 $addr flags signal let counter+=1 let add_nr_ns1-=1 done elif [ $addr_nr_ns1 -lt 0 ]; then let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then - counter=1 - pos=1 - dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`) - if [ ${#dump[@]} -gt 0 ]; then - while [ $counter -le $rm_nr_ns1 ] - do - id=${dump[$pos]} - rm_addr=$(rm_addr_count ${connector_ns}) - ip netns exec ${listener_ns} ./pm_nl_ctl del $id - wait_rm_addr ${connector_ns} ${rm_addr} - let counter+=1 - let pos+=5 + counter=0 + pm_nl_show_endpoints ${listener_ns} | while read line; do + local arr=($line) + local nr=0 + + for i in ${arr[@]}; do + if [ $i = "id" ]; then + if [ $counter -eq $rm_nr_ns1 ]; then + break + fi + id=${arr[$nr+1]} + rm_addr=$(rm_addr_count ${connector_ns}) + pm_nl_del_endpoint ${listener_ns} $id + wait_rm_addr ${connector_ns} ${rm_addr} + let counter+=1 + fi + let nr+=1 done - fi + done elif [ $rm_nr_ns1 -eq 8 ]; then - ip netns exec ${listener_ns} ./pm_nl_ctl flush + pm_nl_flush_endpoint ${listener_ns} elif [ $rm_nr_ns1 -eq 9 ]; then - ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr} + pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr} fi fi @@ -426,30 +545,36 @@ do_transfer() else addr="10.0.$counter.2" fi - ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags + pm_nl_add_endpoint $ns2 $addr flags $flags let counter+=1 let add_nr_ns2-=1 done elif [ $addr_nr_ns2 -lt 0 ]; then let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then - counter=1 - pos=1 - dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`) - if [ ${#dump[@]} -gt 0 ]; then - while [ $counter -le $rm_nr_ns2 ] - do - # rm_addr are serialized, allow the previous one to complete - id=${dump[$pos]} - rm_addr=$(rm_addr_count ${listener_ns}) - ip netns exec ${connector_ns} ./pm_nl_ctl del $id - wait_rm_addr ${listener_ns} ${rm_addr} - let counter+=1 - let pos+=5 + counter=0 + pm_nl_show_endpoints ${connector_ns} | while read line; do + local arr=($line) + local nr=0 + + for i in ${arr[@]}; do + if [ $i = "id" ]; then + if [ $counter -eq $rm_nr_ns2 ]; then + break + fi + # rm_addr are serialized, allow the previous one to + # complete + id=${arr[$nr+1]} + rm_addr=$(rm_addr_count ${listener_ns}) + pm_nl_del_endpoint ${connector_ns} $id + wait_rm_addr ${listener_ns} ${rm_addr} + let counter+=1 + fi + let nr+=1 done - fi + done elif [ $rm_nr_ns2 -eq 8 ]; then - ip netns exec ${connector_ns} ./pm_nl_ctl flush + pm_nl_flush_endpoint ${connector_ns} elif [ $rm_nr_ns2 -eq 9 ]; then local addr if is_v6 "${connect_addr}"; then @@ -457,18 +582,34 @@ do_transfer() else addr="10.0.1.2" fi - ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr + pm_nl_del_endpoint ${connector_ns} 0 $addr fi fi if [ ! -z $sflags ]; then sleep 1 for netns in "$ns1" "$ns2"; do - dump=(`ip netns exec $netns ./pm_nl_ctl dump`) - if [ ${#dump[@]} -gt 0 ]; then - addr=${dump[${#dump[@]} - 1]} - ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags - fi + pm_nl_show_endpoints $netns | while read line; do + local arr=($line) + local addr + local port=0 + local id + + for i in ${arr[@]}; do + if is_addr $i; then + addr=$i + elif is_number $i; then + # The minimum expected port number is 10000 + if [ $i -gt 10000 ]; then + port=$i + # The maximum id number is 255 + elif [ $i -lt 255 ]; then + id=$i + fi + fi + done + pm_nl_change_endpoint $netns $sflags $id $addr $port + done done fi @@ -977,51 +1118,51 @@ subflows_tests() # subflow limited by client reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 0 - ip netns exec $ns2 ./pm_nl_ctl limits 0 0 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 0 + pm_nl_set_limits $ns2 0 0 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow, limited by client" 0 0 0 # subflow limited by server reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 0 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 0 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow, limited by server" 1 1 0 # subflow reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow" 1 1 1 # multiple subflows reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows" 2 2 2 # multiple subflows limited by server reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows, limited by server" 2 2 1 # single subflow, dev reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3 run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow, dev" 1 1 1 } @@ -1031,28 +1172,28 @@ subflows_error_tests() # If a single subflow is configured, and matches the MPC src # address, no additional subflow should be created reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "no MPC reuse with single endpoint" 0 0 0 # multiple subflows, with subflow creation error reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "multi subflows, with failing subflow" 1 1 1 # multiple subflows, with subflow timeout on MPJ reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "multi subflows, with subflow timeout" 1 1 1 @@ -1061,9 +1202,9 @@ subflows_error_tests() # closed subflow (due to reset) is not reused if additional # subflows are added later reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & @@ -1073,7 +1214,7 @@ subflows_error_tests() # mpj subflow will be in TW after the reset wait_for_tw $ns2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow wait # additional subflow could be created only if the PM select @@ -1085,16 +1226,16 @@ signal_address_tests() { # add_address, unused reset - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "unused signal address" 0 0 0 chk_add_nr 1 1 # accept and use add_addr reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address" 1 1 1 chk_add_nr 1 1 @@ -1104,59 +1245,59 @@ signal_address_tests() # belong to different subnets or one of the listed local address could be # used for 'add_addr' subflow reset - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal" 2 2 2 chk_add_nr 1 1 # accept and use add_addr with additional subflows reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 # signal addresses reset - ip netns exec $ns1 ./pm_nl_ctl limits 3 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal addresses" 3 3 3 chk_add_nr 3 3 # signal invalid addresses reset - ip netns exec $ns1 ./pm_nl_ctl limits 3 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal invalid addresses" 1 1 1 chk_add_nr 3 3 # signal addresses race test reset - ip netns exec $ns1 ./pm_nl_ctl limits 4 4 - ip netns exec $ns2 ./pm_nl_ctl limits 4 4 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal + pm_nl_set_limits $ns1 4 4 + pm_nl_set_limits $ns2 4 4 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags signal + pm_nl_add_endpoint $ns2 10.0.2.2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal + pm_nl_add_endpoint $ns2 10.0.4.2 flags signal run_tests $ns1 $ns2 10.0.1.1 # the server will not signal the address terminating @@ -1176,11 +1317,11 @@ link_failure_tests() # active backup and link switch-over. # Let's set some arbitrary (low) virtual link limits. init_shapers - ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow run_tests $ns1 $ns2 10.0.1.1 1 chk_join_nr "multiple flows, signal, link failure" 3 3 3 chk_add_nr 1 1 @@ -1190,11 +1331,11 @@ link_failure_tests() # for bidirectional transfer reset init_shapers - ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow run_tests $ns1 $ns2 10.0.1.1 2 chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3 chk_add_nr 1 1 @@ -1204,11 +1345,11 @@ link_failure_tests() # will never be used reset init_shapers - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 2 export FAILING_LINKS="1" - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup run_tests $ns1 $ns2 10.0.1.1 1 chk_join_nr "backup subflow unused, link failure" 2 2 2 chk_add_nr 1 1 @@ -1218,10 +1359,10 @@ link_failure_tests() # the traffic reset init_shapers - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup export FAILING_LINKS="1 2" run_tests $ns1 $ns2 10.0.1.1 1 chk_join_nr "backup flow used, multi links fail" 2 2 2 @@ -1233,10 +1374,10 @@ link_failure_tests() # for bidirectional transfer reset init_shapers - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup run_tests $ns1 $ns2 10.0.1.1 2 chk_join_nr "backup flow used, bidi, link failure" 2 2 2 chk_add_nr 1 1 @@ -1248,38 +1389,38 @@ add_addr_timeout_tests() { # add_addr timeout reset_with_add_addr_timeout - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 chk_add_nr 4 0 # add_addr timeout IPv6 reset_with_add_addr_timeout 6 - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 chk_add_nr 4 0 # signal addresses timeout reset_with_add_addr_timeout - ip netns exec $ns1 ./pm_nl_ctl limits 2 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 run_tests $ns1 $ns2 10.0.1.1 0 0 0 least chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2 chk_add_nr 8 0 # signal invalid addresses timeout reset_with_add_addr_timeout - ip netns exec $ns1 ./pm_nl_ctl limits 2 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 run_tests $ns1 $ns2 10.0.1.1 0 0 0 least chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1 chk_add_nr 8 0 @@ -1289,28 +1430,28 @@ remove_tests() { # single subflow, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow chk_join_nr "remove single subflow" 1 1 1 chk_rm_nr 1 1 # multiple subflows, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow chk_join_nr "remove multiple subflows" 2 2 2 chk_rm_nr 2 2 # single address, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 @@ -1318,10 +1459,10 @@ remove_tests() # subflow and signal, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow chk_join_nr "remove subflow and signal" 2 2 2 chk_add_nr 1 1 @@ -1329,11 +1470,11 @@ remove_tests() # subflows and signal, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 @@ -1341,11 +1482,11 @@ remove_tests() # addresses remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 3 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow chk_join_nr "remove addresses" 3 3 3 chk_add_nr 3 3 @@ -1353,11 +1494,11 @@ remove_tests() # invalid addresses remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 3 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow chk_join_nr "remove invalid addresses" 1 1 1 chk_add_nr 3 3 @@ -1365,11 +1506,11 @@ remove_tests() # subflows and signal, flush reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow chk_join_nr "flush subflows and signal" 3 3 3 chk_add_nr 1 1 @@ -1377,22 +1518,22 @@ remove_tests() # subflows flush reset - ip netns exec $ns1 ./pm_nl_ctl limits 3 3 - ip netns exec $ns2 ./pm_nl_ctl limits 3 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + pm_nl_set_limits $ns1 3 3 + pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow chk_join_nr "flush subflows" 3 3 3 chk_rm_nr 3 3 # addresses flush reset - ip netns exec $ns1 ./pm_nl_ctl limits 3 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow chk_join_nr "flush addresses" 3 3 3 chk_add_nr 3 3 @@ -1400,11 +1541,11 @@ remove_tests() # invalid addresses flush reset - ip netns exec $ns1 ./pm_nl_ctl limits 3 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow chk_join_nr "flush invalid addresses" 1 1 1 chk_add_nr 3 3 @@ -1412,18 +1553,18 @@ remove_tests() # remove id 0 subflow reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow chk_join_nr "remove id 0 subflow" 1 1 1 chk_rm_nr 1 1 # remove id 0 address reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow chk_join_nr "remove id 0 address" 1 1 1 chk_add_nr 1 1 @@ -1434,37 +1575,37 @@ add_tests() { # add single subflow reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow chk_join_nr "add single subflow" 1 1 1 # add signal address reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow chk_join_nr "add signal address" 1 1 1 chk_add_nr 1 1 # add multiple subflows reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow chk_join_nr "add multiple subflows" 2 2 2 # add multiple subflows IPv6 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow chk_join_nr "add multiple subflows IPv6" 2 2 2 # add multiple addresses IPv6 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow chk_join_nr "add multiple addresses IPv6" 2 2 2 chk_add_nr 2 2 @@ -1474,33 +1615,33 @@ ipv6_tests() { # subflow IPv6 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "single subflow IPv6" 1 1 1 # add_address, unused IPv6 reset - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "unused signal address IPv6" 0 0 0 chk_add_nr 1 1 # signal address IPv6 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "single address IPv6" 1 1 1 chk_add_nr 1 1 # single address IPv6, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow chk_join_nr "remove single address IPv6" 1 1 1 chk_add_nr 1 1 @@ -1508,10 +1649,10 @@ ipv6_tests() # subflow and signal IPv6, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 @@ -1522,76 +1663,76 @@ v4mapped_tests() { # subflow IPv4-mapped to IPv4-mapped reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow run_tests $ns1 $ns2 "::ffff:10.0.1.1" chk_join_nr "single subflow IPv4-mapped" 1 1 1 # signal address IPv4-mapped with IPv4-mapped sk reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal run_tests $ns1 $ns2 "::ffff:10.0.1.1" chk_join_nr "signal address IPv4-mapped" 1 1 1 chk_add_nr 1 1 # subflow v4-map-v6 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 "::ffff:10.0.1.1" chk_join_nr "single subflow v4-map-v6" 1 1 1 # signal address v4-map-v6 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 "::ffff:10.0.1.1" chk_join_nr "signal address v4-map-v6" 1 1 1 chk_add_nr 1 1 # subflow v6-map-v4 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow v6-map-v4" 1 1 1 # signal address v6-map-v4 reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address v6-map-v4" 1 1 1 chk_add_nr 1 1 # no subflow IPv6 to v4 address reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "no JOIN with diff families v4-v6" 0 0 0 # no subflow IPv6 to v4 address even if v6 has a valid v4 at the end reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0 # no subflow IPv4 to v6 address, no need to slow down too then reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr "no JOIN with diff families v6-v4" 0 0 0 } @@ -1600,50 +1741,60 @@ backup_tests() { # single subflow, backup reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup chk_join_nr "single subflow, backup" 1 1 1 chk_prio_nr 0 1 # single address, backup reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup chk_join_nr "single address, backup" 1 1 1 chk_add_nr 1 1 chk_prio_nr 1 0 + + # single address with port, backup + reset + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr "single address with port, backup" 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 } add_addr_ports_tests() { # signal address with port reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address with port" 1 1 1 chk_add_nr 1 1 1 # subflow and signal with port reset - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal with port" 2 2 2 chk_add_nr 1 1 1 # single address with port, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address with port" 1 1 1 chk_add_nr 1 1 1 @@ -1651,10 +1802,10 @@ add_addr_ports_tests() # subflow and signal with port, remove reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow chk_join_nr "remove subflow and signal with port" 2 2 2 chk_add_nr 1 1 1 @@ -1662,11 +1813,11 @@ add_addr_ports_tests() # subflows and signal with port, flush reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow chk_join_nr "flush subflows and signal with port" 3 3 3 chk_add_nr 1 1 @@ -1674,20 +1825,20 @@ add_addr_ports_tests() # multiple addresses with port reset - ip netns exec $ns1 ./pm_nl_ctl limits 2 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100 - ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100 + pm_nl_set_limits $ns2 2 2 run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple addresses with port" 2 2 2 chk_add_nr 2 2 2 # multiple addresses with ports reset - ip netns exec $ns1 ./pm_nl_ctl limits 2 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101 - ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101 + pm_nl_set_limits $ns2 2 2 run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple addresses with ports" 2 2 2 chk_add_nr 2 2 2 @@ -1697,56 +1848,56 @@ syncookies_tests() { # single subflow, syncookies reset_with_cookies - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow with syn cookies" 1 1 1 # multiple subflows with syn cookies reset_with_cookies - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows with syn cookies" 2 2 2 # multiple subflows limited by server reset_with_cookies - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflows limited by server w cookies" 2 1 1 # test signal address with cookies reset_with_cookies - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address with syn cookies" 1 1 1 chk_add_nr 1 1 # test cookie with subflow and signal reset_with_cookies - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl limits 0 2 - ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal w cookies" 2 2 2 chk_add_nr 1 1 # accept and use add_addr with additional subflows reset_with_cookies - ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflows and signal w. cookies" 3 3 3 chk_add_nr 1 1 @@ -1756,29 +1907,29 @@ checksum_tests() { # checksum test 0 0 reset_with_checksum 0 0 - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 run_tests $ns1 $ns2 10.0.1.1 chk_csum_nr "checksum test 0 0" # checksum test 1 1 reset_with_checksum 1 1 - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 run_tests $ns1 $ns2 10.0.1.1 chk_csum_nr "checksum test 1 1" # checksum test 0 1 reset_with_checksum 0 1 - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 run_tests $ns1 $ns2 10.0.1.1 chk_csum_nr "checksum test 0 1" # checksum test 1 0 reset_with_checksum 1 0 - ip netns exec $ns1 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 run_tests $ns1 $ns2 10.0.1.1 chk_csum_nr "checksum test 1 0" } @@ -1787,26 +1938,26 @@ deny_join_id0_tests() { # subflow allow join id0 ns1 reset_with_allow_join_id0 1 0 - ip netns exec $ns1 ./pm_nl_ctl limits 1 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow allow join id0 ns1" 1 1 1 # subflow allow join id0 ns2 reset_with_allow_join_id0 0 1 - ip netns exec $ns1 ./pm_nl_ctl limits 1 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow allow join id0 ns2" 0 0 0 # signal address allow join id0 ns1 # ADD_ADDRs are not affected by allow_join_id0 value. reset_with_allow_join_id0 1 0 - ip netns exec $ns1 ./pm_nl_ctl limits 1 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address allow join id0 ns1" 1 1 1 chk_add_nr 1 1 @@ -1814,28 +1965,28 @@ deny_join_id0_tests() # signal address allow join id0 ns2 # ADD_ADDRs are not affected by allow_join_id0 value. reset_with_allow_join_id0 0 1 - ip netns exec $ns1 ./pm_nl_ctl limits 1 1 - ip netns exec $ns2 ./pm_nl_ctl limits 1 1 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address allow join id0 ns2" 1 1 1 chk_add_nr 1 1 # subflow and address allow join id0 ns1 reset_with_allow_join_id0 1 0 - ip netns exec $ns1 ./pm_nl_ctl limits 2 2 - ip netns exec $ns2 ./pm_nl_ctl limits 2 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and address allow join id0 1" 2 2 2 # subflow and address allow join id0 ns2 reset_with_allow_join_id0 0 1 - ip netns exec $ns1 ./pm_nl_ctl limits 2 2 - ip netns exec $ns2 ./pm_nl_ctl limits 2 2 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and address allow join id0 2" 1 1 1 } @@ -1846,10 +1997,10 @@ fullmesh_tests() # 2 fullmesh addrs in ns2, added before the connection, # 1 non-fullmesh addr in ns1, added during the connection. reset - ip netns exec $ns1 ./pm_nl_ctl limits 0 4 - ip netns exec $ns2 ./pm_nl_ctl limits 1 4 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow chk_join_nr "fullmesh test 2x1" 4 4 4 chk_add_nr 1 1 @@ -1858,9 +2009,9 @@ fullmesh_tests() # 1 non-fullmesh addr in ns1, added before the connection, # 1 fullmesh addr in ns2, added during the connection. reset - ip netns exec $ns1 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 1 3 + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow chk_join_nr "fullmesh test 1x1" 3 3 3 chk_add_nr 1 1 @@ -1869,9 +2020,9 @@ fullmesh_tests() # 1 non-fullmesh addr in ns1, added before the connection, # 2 fullmesh addrs in ns2, added during the connection. reset - ip netns exec $ns1 ./pm_nl_ctl limits 2 5 - ip netns exec $ns2 ./pm_nl_ctl limits 1 5 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 2 5 + pm_nl_set_limits $ns2 1 5 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow chk_join_nr "fullmesh test 1x2" 5 5 5 chk_add_nr 1 1 @@ -1881,36 +2032,36 @@ fullmesh_tests() # 2 fullmesh addrs in ns2, added during the connection, # limit max_subflows to 4. reset - ip netns exec $ns1 ./pm_nl_ctl limits 2 4 - ip netns exec $ns2 ./pm_nl_ctl limits 1 4 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + pm_nl_set_limits $ns1 2 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow chk_join_nr "fullmesh test 1x2, limited" 4 4 4 chk_add_nr 1 1 # set fullmesh flag reset - ip netns exec $ns1 ./pm_nl_ctl limits 4 4 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow - ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow + pm_nl_set_limits $ns2 4 4 run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh chk_join_nr "set fullmesh flag test" 2 2 2 chk_rm_nr 0 1 # set nofullmesh flag reset - ip netns exec $ns1 ./pm_nl_ctl limits 4 4 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh - ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh + pm_nl_set_limits $ns2 4 4 run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh chk_join_nr "set nofullmesh flag test" 2 2 2 chk_rm_nr 0 1 # set backup,fullmesh flags reset - ip netns exec $ns1 ./pm_nl_ctl limits 4 4 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow - ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow + pm_nl_set_limits $ns2 4 4 run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh chk_join_nr "set backup,fullmesh flags test" 2 2 2 chk_prio_nr 0 1 @@ -1918,9 +2069,9 @@ fullmesh_tests() # set nobackup,nofullmesh flags reset - ip netns exec $ns1 ./pm_nl_ctl limits 4 4 - ip netns exec $ns2 ./pm_nl_ctl limits 4 4 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh + pm_nl_set_limits $ns1 4 4 + pm_nl_set_limits $ns2 4 4 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2 chk_prio_nr 0 1 @@ -1966,6 +2117,7 @@ usage() echo " -m fullmesh_tests" echo " -c capture pcap files" echo " -C enable data checksum" + echo " -i use ip mptcp" echo " -h help" } @@ -1987,9 +2139,12 @@ for arg in "$@"; do if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then checksum=1 fi + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"i"[0-9a-zA-Z]*$ ]]; then + ip_mptcp=1 + fi - # exception for the capture/checksum options, the rest means: a part of the tests - if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then + # exception for the capture/checksum/ip_mptcp options, the rest means: a part of the tests + if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ] && [ "${arg}" != "-i" ]; then do_all_tests=0 fi done @@ -1999,7 +2154,7 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fesltra64bpkdmchCS' opt; do +while getopts 'fesltra64bpkdmchCSi' opt; do case $opt in f) subflows_tests @@ -2050,6 +2205,8 @@ while getopts 'fesltra64bpkdmchCS' opt; do ;; C) ;; + i) + ;; h | *) usage ;; diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index cbacf9f6538b..89839d1ff9d8 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -164,4 +164,22 @@ id 253 flags 10.0.0.5 id 254 flags 10.0.0.2 id 255 flags 10.0.0.3" "wrap-around ids" +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow +ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow,backup 10.0.1.1" "set flags (backup)" +ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow 10.0.1.1" " (nobackup)" +ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow,fullmesh 10.0.1.1" " (fullmesh)" +ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow 10.0.1.1" " (nofullmesh)" +ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" + exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 152b84e44d69..22a5ec1e128e 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -28,7 +28,7 @@ static void syntax(char *argv[]) fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); - fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh]\n"); + fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); @@ -657,8 +657,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) u_int32_t flags = 0; u_int16_t family; int nest_start; + int use_id = 0; + u_int8_t id; int off = 0; - int arg; + int arg = 2; memset(data, 0, sizeof(data)); nh = (void *)data; @@ -674,29 +676,45 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) nest->rta_len = RTA_LENGTH(0); off += NLMSG_ALIGN(nest->rta_len); - /* addr data */ - rta = (void *)(data + off); - if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) { - family = AF_INET; - rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; - rta->rta_len = RTA_LENGTH(4); - } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) { - family = AF_INET6; - rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; - rta->rta_len = RTA_LENGTH(16); + if (!strcmp(argv[arg], "id")) { + if (++arg >= argc) + error(1, 0, " missing id value"); + + use_id = 1; + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); } else { - error(1, errno, "can't parse ip %s", argv[2]); + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[arg]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); } - off += NLMSG_ALIGN(rta->rta_len); - /* family */ - rta = (void *)(data + off); - rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; - rta->rta_len = RTA_LENGTH(2); - memcpy(RTA_DATA(rta), &family, 2); - off += NLMSG_ALIGN(rta->rta_len); + if (++arg >= argc) + error(1, 0, " missing flags keyword"); - for (arg = 3; arg < argc; arg++) { + for (; arg < argc; arg++) { if (!strcmp(argv[arg], "flags")) { char *tok, *str; @@ -721,6 +739,21 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(4); memcpy(RTA_DATA(rta), &flags, 4); off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + u_int16_t port; + + if (use_id) + error(1, 0, " port can't be used with id"); + + if (++arg >= argc) + error(1, 0, " missing port value"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); } else { error(1, 0, "unknown keyword %s", argv[arg]); } |