summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_iterators.rst117
-rw-r--r--Documentation/bpf/kfuncs.rst17
-rw-r--r--Documentation/netlink/specs/rt-link.yaml68
-rw-r--r--arch/arm64/net/bpf_jit_comp.c242
-rw-r--r--arch/riscv/net/bpf_jit.h15
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c332
-rw-r--r--arch/riscv/net/bpf_jit_core.c3
-rw-r--r--arch/s390/include/asm/nospec-branch.h4
-rw-r--r--arch/s390/net/bpf_jit_comp.c138
-rw-r--r--drivers/bluetooth/btnxpuart.c2
-rw-r--r--drivers/bluetooth/hci_qca.c14
-rw-r--r--drivers/dma-buf/dma-buf.c98
-rw-r--r--drivers/net/can/kvaser_pciefd.c3
-rw-r--r--drivers/net/dsa/b53/b53_common.c58
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c2
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.h1
-rw-r--r--drivers/net/ethernet/airoha/airoha_ppe.c54
-rw-r--r--drivers/net/ethernet/airoha/airoha_regs.h10
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c3
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_main.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c47
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c181
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c18
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c9
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c45
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.h8
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.c2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.h1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_clock.c2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c21
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_est.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c2
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_stats.c8
-rw-r--r--drivers/net/hyperv/netvsc_bpf.c2
-rw-r--r--drivers/net/hyperv/netvsc_drv.c4
-rw-r--r--drivers/net/ovpn/io.c8
-rw-r--r--drivers/net/ovpn/netlink.c16
-rw-r--r--drivers/net/ovpn/peer.c4
-rw-r--r--drivers/net/ovpn/socket.c68
-rw-r--r--drivers/net/ovpn/socket.h4
-rw-r--r--drivers/net/ovpn/tcp.c73
-rw-r--r--drivers/net/ovpn/tcp.h3
-rw-r--r--drivers/net/ovpn/udp.c46
-rw-r--r--drivers/net/ovpn/udp.h4
-rw-r--r--drivers/net/usb/aqc111.c8
-rw-r--r--drivers/net/usb/ch9200.c7
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c26
-rw-r--r--drivers/net/wwan/mhi_wwan_mbim.c9
-rw-r--r--drivers/net/wwan/t7xx/t7xx_netdev.c11
-rw-r--r--include/asm-generic/vmlinux.lds.h3
-rw-r--r--include/linux/bpf-cgroup.h8
-rw-r--r--include/linux/bpf.h20
-rw-r--r--include/linux/bpf_verifier.h24
-rw-r--r--include/linux/dma-buf.h4
-rw-r--r--include/net/checksum.h2
-rw-r--r--include/uapi/linux/bpf.h21
-rw-r--r--kernel/bpf/Makefile3
-rw-r--r--kernel/bpf/bpf_struct_ops.c2
-rw-r--r--kernel/bpf/btf.c45
-rw-r--r--kernel/bpf/cgroup.c32
-rw-r--r--kernel/bpf/core.c29
-rw-r--r--kernel/bpf/dmabuf_iter.c150
-rw-r--r--kernel/bpf/hashtab.c148
-rw-r--r--kernel/bpf/helpers.c133
-rw-r--r--kernel/bpf/syscall.c10
-rw-r--r--kernel/bpf/sysfs_btf.c32
-rw-r--r--kernel/bpf/verifier.c636
-rw-r--r--kernel/sched/ext.c15
-rw-r--r--kernel/trace/bpf_trace.c321
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/mgmt.c3
-rw-r--r--net/bpf/test_run.c8
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/devmem.h3
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/page_pool.c27
-rw-r--r--net/core/skmsg.c56
-rw-r--r--net/core/sock.c8
-rw-r--r--net/core/utils.c4
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/ipv4/udp_offload.c5
-rw-r--r--net/ipv6/ila/ila_common.c6
-rw-r--r--net/netfilter/nf_nat_core.c12
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c21
-rw-r--r--net/rxrpc/insecure.c5
-rw-r--r--net/tipc/crypto.c6
-rw-r--r--net/tls/tls_sw.c15
-rw-r--r--scripts/Makefile.btf2
-rwxr-xr-xscripts/bpf_doc.py119
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst10
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool4
-rw-r--r--tools/bpf/bpftool/cgroup.c14
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/prog.c12
-rw-r--r--tools/include/uapi/linux/bpf.h21
-rw-r--r--tools/lib/bpf/bpf_core_read.h6
-rw-r--r--tools/lib/bpf/bpf_helpers.h8
-rw-r--r--tools/lib/bpf/btf.c226
-rw-r--r--tools/lib/bpf/libbpf.c87
-rw-r--r--tools/lib/bpf/libbpf.h6
-rw-r--r--tools/lib/bpf/libbpf.map4
-rw-r--r--tools/lib/bpf/libbpf_internal.h9
-rw-r--r--tools/lib/bpf/linker.c6
-rw-r--r--tools/lib/bpf/nlattr.c15
-rw-r--r--tools/testing/kunit/configs/all_tests.config1
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/bpf/DENYLIST1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/Makefile16
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_htab_mem.c3
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c598
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h5
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_sysfs.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c192
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c457
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_redir.c465
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_btf_ext.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bench_sockmap_prog.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h (renamed from tools/testing/selftests/bpf/bpf_arena_spin_lock.h)15
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/dmabuf_iter.c101
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c230
-rw-r--r--tools/testing/selftests/bpf/progs/fd_htab_lookup.c25
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c2
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_peek.c113
-rw-r--r--tools/testing/selftests/bpf/progs/prepare.c1
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c29
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c206
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_ext.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_redir.c68
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_trap.c71
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_load_acquire.c48
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c58
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_store_release.c39
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c13
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c8
-rw-r--r--tools/testing/selftests/bpf/test_loader.c14
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c8
-rw-r--r--tools/testing/selftests/bpf/veristat.c101
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh102
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh81
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c1
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-large-mtu.sh9
176 files changed, 6655 insertions, 2056 deletions
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst
index 7f514cb6b052..189e3ec1c6c8 100644
--- a/Documentation/bpf/bpf_iterators.rst
+++ b/Documentation/bpf/bpf_iterators.rst
@@ -2,10 +2,117 @@
BPF Iterators
=============
+--------
+Overview
+--------
+
+BPF supports two separate entities collectively known as "BPF iterators": BPF
+iterator *program type* and *open-coded* BPF iterators. The former is
+a stand-alone BPF program type which, when attached and activated by user,
+will be called once for each entity (task_struct, cgroup, etc) that is being
+iterated. The latter is a set of BPF-side APIs implementing iterator
+functionality and available across multiple BPF program types. Open-coded
+iterators provide similar functionality to BPF iterator programs, but gives
+more flexibility and control to all other BPF program types. BPF iterator
+programs, on the other hand, can be used to implement anonymous or BPF
+FS-mounted special files, whose contents are generated by attached BPF iterator
+program, backed by seq_file functionality. Both are useful depending on
+specific needs.
+
+When adding a new BPF iterator program, it is expected that similar
+functionality will be added as open-coded iterator for maximum flexibility.
+It's also expected that iteration logic and code will be maximally shared and
+reused between two iterator API surfaces.
-----------
-Motivation
-----------
+------------------------
+Open-coded BPF Iterators
+------------------------
+
+Open-coded BPF iterators are implemented as tightly-coupled trios of kfuncs
+(constructor, next element fetch, destructor) and iterator-specific type
+describing on-the-stack iterator state, which is guaranteed by the BPF
+verifier to not be tampered with outside of the corresponding
+constructor/destructor/next APIs.
+
+Each kind of open-coded BPF iterator has its own associated
+struct bpf_iter_<type>, where <type> denotes a specific type of iterator.
+bpf_iter_<type> state needs to live on BPF program stack, so make sure it's
+small enough to fit on BPF stack. For performance reasons its best to avoid
+dynamic memory allocation for iterator state and size the state struct big
+enough to fit everything necessary. But if necessary, dynamic memory
+allocation is a way to bypass BPF stack limitations. Note, state struct size
+is part of iterator's user-visible API, so changing it will break backwards
+compatibility, so be deliberate about designing it.
+
+All kfuncs (constructor, next, destructor) have to be named consistently as
+bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents iterator
+type, and iterator state should be represented as a matching
+`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
+a pointer to this `struct bpf_iter_<type>` as the very first argument.
+
+Additionally:
+ - Constructor, i.e., `bpf_iter_<type>_new()`, can have arbitrary extra
+ number of arguments. Return type is not enforced either.
+ - Next method, i.e., `bpf_iter_<type>_next()`, has to return a pointer
+ type and should have exactly one argument: `struct bpf_iter_<type> *`
+ (const/volatile/restrict and typedefs are ignored).
+ - Destructor, i.e., `bpf_iter_<type>_destroy()`, should return void and
+ should have exactly one argument, similar to the next method.
+ - `struct bpf_iter_<type>` size is enforced to be positive and
+ a multiple of 8 bytes (to fit stack slots correctly).
+
+Such strictness and consistency allows to build generic helpers abstracting
+important, but boilerplate, details to be able to use open-coded iterators
+effectively and ergonomically (see libbpf's bpf_for_each() macro). This is
+enforced at kfunc registration point by the kernel.
+
+Constructor/next/destructor implementation contract is as follows:
+ - constructor, `bpf_iter_<type>_new()`, always initializes iterator state on
+ the stack. If any of the input arguments are invalid, constructor should
+ make sure to still initialize it such that subsequent next() calls will
+ return NULL. I.e., on error, *return error and construct empty iterator*.
+ Constructor kfunc is marked with KF_ITER_NEW flag.
+
+ - next method, `bpf_iter_<type>_next()`, accepts pointer to iterator state
+ and produces an element. Next method should always return a pointer. The
+ contract between BPF verifier is that next method *guarantees* that it
+ will eventually return NULL when elements are exhausted. Once NULL is
+ returned, subsequent next calls *should keep returning NULL*. Next method
+ is marked with KF_ITER_NEXT (and should also have KF_RET_NULL as
+ NULL-returning kfunc, of course).
+
+ - destructor, `bpf_iter_<type>_destroy()`, is always called once. Even if
+ constructor failed or next returned nothing. Destructor frees up any
+ resources and marks stack space used by `struct bpf_iter_<type>` as usable
+ for something else. Destructor is marked with KF_ITER_DESTROY flag.
+
+Any open-coded BPF iterator implementation has to implement at least these
+three methods. It is enforced that for any given type of iterator only
+applicable constructor/destructor/next are callable. I.e., verifier ensures
+you can't pass number iterator state into, say, cgroup iterator's next method.
+
+From a 10,000-feet BPF verification point of view, next methods are the points
+of forking a verification state, which are conceptually similar to what
+verifier is doing when validating conditional jumps. Verifier is branching out
+`call bpf_iter_<type>_next` instruction and simulates two outcomes: NULL
+(iteration is done) and non-NULL (new element is returned). NULL is simulated
+first and is supposed to reach exit without looping. After that non-NULL case
+is validated and it either reaches exit (for trivial examples with no real
+loop), or reaches another `call bpf_iter_<type>_next` instruction with the
+state equivalent to already (partially) validated one. State equivalency at
+that point means we technically are going to be looping forever without
+"breaking out" out of established "state envelope" (i.e., subsequent
+iterations don't add any new knowledge or constraints to the verifier state,
+so running 1, 2, 10, or a million of them doesn't matter). But taking into
+account the contract stating that iterator next method *has to* return NULL
+eventually, we can conclude that loop body is safe and will eventually
+terminate. Given we validated logic outside of the loop (NULL case), and
+concluded that loop body is safe (though potentially looping many times),
+verifier can claim safety of the overall program logic.
+
+------------------------
+BPF Iterators Motivation
+------------------------
There are a few existing ways to dump kernel data into user space. The most
popular one is the ``/proc`` system. For example, ``cat /proc/net/tcp6`` dumps
@@ -323,8 +430,8 @@ Now, in the userspace program, pass the pointer of struct to the
::
- link = bpf_program__attach_iter(prog, &opts); iter_fd =
- bpf_iter_create(bpf_link__fd(link));
+ link = bpf_program__attach_iter(prog, &opts);
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
If both *tid* and *pid* are zero, an iterator created from this struct
``bpf_iter_attach_opts`` will include every opened file of every task in the
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index a8f5782bd833..ae468b781d31 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -160,6 +160,23 @@ Or::
...
}
+2.2.6 __prog Annotation
+---------------------------
+This annotation is used to indicate that the argument needs to be fixed up to
+the bpf_prog_aux of the caller BPF program. Any value passed into this argument
+is ignored, and rewritten by the verifier.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *value),
+ unsigned int flags,
+ void *aux__prog)
+ {
+ struct bpf_prog_aux *aux = aux__prog;
+ ...
+ }
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml
index 5ec3d35b7a38..b41b31eebcae 100644
--- a/Documentation/netlink/specs/rt-link.yaml
+++ b/Documentation/netlink/specs/rt-link.yaml
@@ -1685,15 +1685,19 @@ attribute-sets:
-
name: iflags
type: u16
+ byte-order: big-endian
-
name: oflags
type: u16
+ byte-order: big-endian
-
name: ikey
type: u32
+ byte-order: big-endian
-
name: okey
type: u32
+ byte-order: big-endian
-
name: local
type: binary
@@ -1713,10 +1717,11 @@ attribute-sets:
type: u8
-
name: encap-limit
- type: u32
+ type: u8
-
name: flowinfo
type: u32
+ byte-order: big-endian
-
name: flags
type: u32
@@ -1729,9 +1734,11 @@ attribute-sets:
-
name: encap-sport
type: u16
+ byte-order: big-endian
-
name: encap-dport
type: u16
+ byte-order: big-endian
-
name: collect-metadata
type: flag
@@ -1754,6 +1761,54 @@ attribute-sets:
name: erspan-hwid
type: u16
-
+ name: linkinfo-gre6-attrs
+ subset-of: linkinfo-gre-attrs
+ attributes:
+ -
+ name: link
+ -
+ name: iflags
+ -
+ name: oflags
+ -
+ name: ikey
+ -
+ name: okey
+ -
+ name: local
+ display-hint: ipv6
+ -
+ name: remote
+ display-hint: ipv6
+ -
+ name: ttl
+ -
+ name: encap-limit
+ -
+ name: flowinfo
+ -
+ name: flags
+ -
+ name: encap-type
+ -
+ name: encap-flags
+ -
+ name: encap-sport
+ -
+ name: encap-dport
+ -
+ name: collect-metadata
+ -
+ name: fwmark
+ -
+ name: erspan-index
+ -
+ name: erspan-ver
+ -
+ name: erspan-dir
+ -
+ name: erspan-hwid
+ -
name: linkinfo-vti-attrs
name-prefix: ifla-vti-
header: linux/if_tunnel.h
@@ -1764,9 +1819,11 @@ attribute-sets:
-
name: ikey
type: u32
+ byte-order: big-endian
-
name: okey
type: u32
+ byte-order: big-endian
-
name: local
type: binary
@@ -1816,6 +1873,7 @@ attribute-sets:
-
name: port
type: u16
+ byte-order: big-endian
-
name: collect-metadata
type: flag
@@ -1835,6 +1893,7 @@ attribute-sets:
-
name: label
type: u32
+ byte-order: big-endian
-
name: ttl-inherit
type: u8
@@ -1875,9 +1934,11 @@ attribute-sets:
-
name: flowinfo
type: u32
+ byte-order: big-endian
-
name: flags
type: u16
+ byte-order: big-endian
-
name: proto
type: u8
@@ -1907,9 +1968,11 @@ attribute-sets:
-
name: encap-sport
type: u16
+ byte-order: big-endian
-
name: encap-dport
type: u16
+ byte-order: big-endian
-
name: collect-metadata
type: flag
@@ -2225,6 +2288,9 @@ sub-messages:
value: gretap
attribute-set: linkinfo-gre-attrs
-
+ value: ip6gre
+ attribute-set: linkinfo-gre6-attrs
+ -
value: geneve
attribute-set: linkinfo-geneve-attrs
-
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 634d78422adb..da8b89dd2910 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2113,7 +2113,7 @@ bool bpf_jit_supports_subprog_tailcalls(void)
}
static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
- int args_off, int retval_off, int run_ctx_off,
+ int bargs_off, int retval_off, int run_ctx_off,
bool save_ret)
{
__le32 *branch;
@@ -2155,7 +2155,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
branch = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
- emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
+ emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx);
if (!p->jited)
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
@@ -2180,7 +2180,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
}
static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
- int args_off, int retval_off, int run_ctx_off,
+ int bargs_off, int retval_off, int run_ctx_off,
__le32 **branches)
{
int i;
@@ -2190,7 +2190,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
*/
emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
for (i = 0; i < tl->nr_links; i++) {
- invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
+ invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
run_ctx_off, true);
/* if (*(u64 *)(sp + retval_off) != 0)
* goto do_fexit;
@@ -2204,23 +2204,125 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
}
}
-static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
+struct arg_aux {
+ /* how many args are passed through registers, the rest of the args are
+ * passed through stack
+ */
+ int args_in_regs;
+ /* how many registers are used to pass arguments */
+ int regs_for_args;
+ /* how much stack is used for additional args passed to bpf program
+ * that did not fit in original function registers
+ */
+ int bstack_for_args;
+ /* home much stack is used for additional args passed to the
+ * original function when called from trampoline (this one needs
+ * arguments to be properly aligned)
+ */
+ int ostack_for_args;
+};
+
+static int calc_arg_aux(const struct btf_func_model *m,
+ struct arg_aux *a)
{
- int i;
+ int stack_slots, nregs, slots, i;
+
+ /* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */
+ for (i = 0, nregs = 0; i < m->nr_args; i++) {
+ slots = (m->arg_size[i] + 7) / 8;
+ if (nregs + slots <= 8) /* passed through register ? */
+ nregs += slots;
+ else
+ break;
+ }
+
+ a->args_in_regs = i;
+ a->regs_for_args = nregs;
+ a->ostack_for_args = 0;
+ a->bstack_for_args = 0;
+
+ /* the rest arguments are passed through stack */
+ for (; i < m->nr_args; i++) {
+ /* We can not know for sure about exact alignment needs for
+ * struct passed on stack, so deny those
+ */
+ if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+ return -ENOTSUPP;
+ stack_slots = (m->arg_size[i] + 7) / 8;
+ a->bstack_for_args += stack_slots * 8;
+ a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
+ }
+
+ return 0;
+}
- for (i = 0; i < nregs; i++) {
- emit(A64_STR64I(i, A64_SP, args_off), ctx);
- args_off += 8;
+static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes)
+{
+ if (effective_bytes) {
+ int garbage_bits = 64 - 8 * effective_bytes;
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ /* garbage bits are at the right end */
+ emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
+ emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
+#else
+ /* garbage bits are at the left end */
+ emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
+ emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
+#endif
}
}
-static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
+static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off,
+ const struct btf_func_model *m,
+ const struct arg_aux *a,
+ bool for_call_origin)
{
int i;
+ int reg;
+ int doff;
+ int soff;
+ int slots;
+ u8 tmp = bpf2a64[TMP_REG_1];
+
+ /* store arguments to the stack for the bpf program, or restore
+ * arguments from stack for the original function
+ */
+ for (reg = 0; reg < a->regs_for_args; reg++) {
+ emit(for_call_origin ?
+ A64_LDR64I(reg, A64_SP, bargs_off) :
+ A64_STR64I(reg, A64_SP, bargs_off),
+ ctx);
+ bargs_off += 8;
+ }
+
+ soff = 32; /* on stack arguments start from FP + 32 */
+ doff = (for_call_origin ? oargs_off : bargs_off);
+
+ /* save on stack arguments */
+ for (i = a->args_in_regs; i < m->nr_args; i++) {
+ slots = (m->arg_size[i] + 7) / 8;
+ /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */
+ while (slots-- > 0) {
+ emit(A64_LDR64I(tmp, A64_FP, soff), ctx);
+ /* if there is unused space in the last slot, clear
+ * the garbage contained in the space.
+ */
+ if (slots == 0 && !for_call_origin)
+ clear_garbage(ctx, tmp, m->arg_size[i] % 8);
+ emit(A64_STR64I(tmp, A64_SP, doff), ctx);
+ soff += 8;
+ doff += 8;
+ }
+ }
+}
- for (i = 0; i < nregs; i++) {
- emit(A64_LDR64I(i, A64_SP, args_off), ctx);
- args_off += 8;
+static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
+{
+ int reg;
+
+ for (reg = 0; reg < nregs; reg++) {
+ emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx);
+ bargs_off += 8;
}
}
@@ -2243,17 +2345,21 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
*/
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
struct bpf_tramp_links *tlinks, void *func_addr,
- int nregs, u32 flags)
+ const struct btf_func_model *m,
+ const struct arg_aux *a,
+ u32 flags)
{
int i;
int stack_size;
int retaddr_off;
int regs_off;
int retval_off;
- int args_off;
- int nregs_off;
+ int bargs_off;
+ int nfuncargs_off;
int ip_off;
int run_ctx_off;
+ int oargs_off;
+ int nfuncargs;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2262,31 +2368,38 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
bool is_struct_ops = is_struct_ops_tramp(fentry);
/* trampoline stack layout:
- * [ parent ip ]
- * [ FP ]
- * SP + retaddr_off [ self ip ]
- * [ FP ]
+ * [ parent ip ]
+ * [ FP ]
+ * SP + retaddr_off [ self ip ]
+ * [ FP ]
*
- * [ padding ] align SP to multiples of 16
+ * [ padding ] align SP to multiples of 16
*
- * [ x20 ] callee saved reg x20
- * SP + regs_off [ x19 ] callee saved reg x19
+ * [ x20 ] callee saved reg x20
+ * SP + regs_off [ x19 ] callee saved reg x19
*
- * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
- * BPF_TRAMP_F_RET_FENTRY_RET
+ * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
+ * BPF_TRAMP_F_RET_FENTRY_RET
+ * [ arg reg N ]
+ * [ ... ]
+ * SP + bargs_off [ arg reg 1 ] for bpf
*
- * [ arg reg N ]
- * [ ... ]
- * SP + args_off [ arg reg 1 ]
+ * SP + nfuncargs_off [ arg regs count ]
*
- * SP + nregs_off [ arg regs count ]
+ * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
- * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
+ * SP + run_ctx_off [ bpf_tramp_run_ctx ]
*
- * SP + run_ctx_off [ bpf_tramp_run_ctx ]
+ * [ stack arg N ]
+ * [ ... ]
+ * SP + oargs_off [ stack arg 1 ] for original func
*/
stack_size = 0;
+ oargs_off = stack_size;
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ stack_size += a->ostack_for_args;
+
run_ctx_off = stack_size;
/* room for bpf_tramp_run_ctx */
stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
@@ -2296,13 +2409,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8;
- nregs_off = stack_size;
+ nfuncargs_off = stack_size;
/* room for args count */
stack_size += 8;
- args_off = stack_size;
+ bargs_off = stack_size;
/* room for args */
- stack_size += nregs * 8;
+ nfuncargs = a->regs_for_args + a->bstack_for_args / 8;
+ stack_size += 8 * nfuncargs;
/* room for return value */
retval_off = stack_size;
@@ -2349,11 +2463,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
/* save arg regs count*/
- emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
- emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
+ emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx);
+ emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx);
- /* save arg regs */
- save_args(ctx, args_off, nregs);
+ /* save args for bpf */
+ save_args(ctx, bargs_off, oargs_off, m, a, false);
/* save callee saved registers */
emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2369,7 +2483,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
for (i = 0; i < fentry->nr_links; i++)
- invoke_bpf_prog(ctx, fentry->links[i], args_off,
+ invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
retval_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET);
@@ -2379,12 +2493,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (!branches)
return -ENOMEM;
- invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
+ invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off,
run_ctx_off, branches);
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- restore_args(ctx, args_off, nregs);
+ /* save args for original func */
+ save_args(ctx, bargs_off, oargs_off, m, a, true);
/* call original func */
emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
@@ -2403,7 +2518,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
for (i = 0; i < fexit->nr_links; i++)
- invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
+ invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
run_ctx_off, false);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
@@ -2417,7 +2532,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
- restore_args(ctx, args_off, nregs);
+ restore_args(ctx, bargs_off, a->regs_for_args);
/* restore callee saved register x19 and x20 */
emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2454,21 +2569,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
return ctx->idx;
}
-static int btf_func_model_nregs(const struct btf_func_model *m)
-{
- int nregs = m->nr_args;
- int i;
-
- /* extra registers needed for struct argument */
- for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
- /* The arg_size is at most 16 bytes, enforced by the verifier. */
- if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
- nregs += (m->arg_size[i] + 7) / 8 - 1;
- }
-
- return nregs;
-}
-
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
@@ -2477,14 +2577,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
.idx = 0,
};
struct bpf_tramp_image im;
- int nregs, ret;
+ struct arg_aux aaux;
+ int ret;
- nregs = btf_func_model_nregs(m);
- /* the first 8 registers are used for arguments */
- if (nregs > 8)
- return -ENOTSUPP;
+ ret = calc_arg_aux(m, &aaux);
+ if (ret < 0)
+ return ret;
- ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
+ ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
if (ret < 0)
return ret;
@@ -2511,9 +2611,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
{
- int ret, nregs;
- void *image, *tmp;
u32 size = ro_image_end - ro_image;
+ struct arg_aux aaux;
+ void *image, *tmp;
+ int ret;
/* image doesn't need to be in module memory range, so we can
* use kvmalloc.
@@ -2529,13 +2630,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
.write = true,
};
- nregs = btf_func_model_nregs(m);
- /* the first 8 registers are used for arguments */
- if (nregs > 8)
- return -ENOTSUPP;
jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
- ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
+ ret = calc_arg_aux(m, &aaux);
+ if (ret)
+ goto out;
+ ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
if (ret > 0 && validate_code(&ctx) < 0) {
ret = -EINVAL;
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 1d1c78d4cff1..e7b032dfd17f 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -608,6 +608,21 @@ static inline u32 rv_fence(u8 pred, u8 succ)
return rv_i_insn(imm11_0, 0, 0, 0, 0xf);
}
+static inline void emit_fence_r_rw(struct rv_jit_context *ctx)
+{
+ emit(rv_fence(0x2, 0x3), ctx);
+}
+
+static inline void emit_fence_rw_w(struct rv_jit_context *ctx)
+{
+ emit(rv_fence(0x3, 0x1), ctx);
+}
+
+static inline void emit_fence_rw_rw(struct rv_jit_context *ctx)
+{
+ emit(rv_fence(0x3, 0x3), ctx);
+}
+
static inline u32 rv_nop(void)
{
return rv_i_insn(0, 0, 0, 0, 0x13);
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index ca60db75199d..10e01ff06312 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -473,11 +473,212 @@ static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx)
emit(hash, ctx);
}
-static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
- struct rv_jit_context *ctx)
+static int emit_load_8(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lb(rd, off, rs), ctx);
+ else
+ emit(rv_lbu(rd, off, rs), ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lb(rd, 0, RV_REG_T1), ctx);
+ else
+ emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static int emit_load_16(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lh(rd, off, rs), ctx);
+ else
+ emit(rv_lhu(rd, off, rs), ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lh(rd, 0, RV_REG_T1), ctx);
+ else
+ emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static int emit_load_32(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lw(rd, off, rs), ctx);
+ else
+ emit(rv_lwu(rd, off, rs), ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lw(rd, 0, RV_REG_T1), ctx);
+ else
+ emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static int emit_load_64(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit_ld(rd, off, rs, ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit_ld(rd, 0, RV_REG_T1, ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static void emit_store_8(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit(rv_sb(rd, off, rs), ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit(rv_sb(RV_REG_T1, 0, rs), ctx);
+}
+
+static void emit_store_16(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit(rv_sh(rd, off, rs), ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit(rv_sh(RV_REG_T1, 0, rs), ctx);
+}
+
+static void emit_store_32(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit_sw(rd, off, rs, ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sw(RV_REG_T1, 0, rs, ctx);
+}
+
+static void emit_store_64(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit_sd(rd, off, rs, ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sd(RV_REG_T1, 0, rs, ctx);
+}
+
+static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
+ struct rv_jit_context *ctx)
+{
+ u8 code = insn->code;
+ s32 imm = insn->imm;
+ s16 off = insn->off;
+
+ switch (imm) {
+ /* dst_reg = load_acquire(src_reg + off16) */
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit_load_8(false, rd, off, rs, ctx);
+ break;
+ case BPF_H:
+ emit_load_16(false, rd, off, rs, ctx);
+ break;
+ case BPF_W:
+ emit_load_32(false, rd, off, rs, ctx);
+ break;
+ case BPF_DW:
+ emit_load_64(false, rd, off, rs, ctx);
+ break;
+ }
+ emit_fence_r_rw(ctx);
+
+ /* If our next insn is a redundant zext, return 1 to tell
+ * build_body() to skip it.
+ */
+ if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1]))
+ return 1;
+ break;
+ /* store_release(dst_reg + off16, src_reg) */
+ case BPF_STORE_REL:
+ emit_fence_rw_w(ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit_store_8(rd, off, rs, ctx);
+ break;
+ case BPF_H:
+ emit_store_16(rd, off, rs, ctx);
+ break;
+ case BPF_W:
+ emit_store_32(rd, off, rs, ctx);
+ break;
+ case BPF_DW:
+ emit_store_64(rd, off, rs, ctx);
+ break;
+ }
+ break;
+ default:
+ pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
+ struct rv_jit_context *ctx)
{
- u8 r0;
+ u8 r0, code = insn->code;
+ s16 off = insn->off;
+ s32 imm = insn->imm;
int jmp_offset;
+ bool is64;
+
+ if (BPF_SIZE(code) != BPF_W && BPF_SIZE(code) != BPF_DW) {
+ pr_err_once("bpf-jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EINVAL;
+ }
+ is64 = BPF_SIZE(code) == BPF_DW;
if (off) {
if (is_12b_int(off)) {
@@ -554,9 +755,14 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
jmp_offset = ninsns_rvoff(-6);
emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
- emit(rv_fence(0x3, 0x3), ctx);
+ emit_fence_rw_rw(ctx);
break;
+ default:
+ pr_err_once("bpf-jit: invalid atomic RMW opcode %02x\n", imm);
+ return -EINVAL;
}
+
+ return 0;
}
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
@@ -1650,8 +1856,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
{
- int insn_len, insns_start;
bool sign_ext;
+ int insn_len;
sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
@@ -1663,78 +1869,16 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
switch (BPF_SIZE(code)) {
case BPF_B:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lb(rd, off, rs), ctx);
- else
- emit(rv_lbu(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lb(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_8(sign_ext, rd, off, rs, ctx);
break;
case BPF_H:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lh(rd, off, rs), ctx);
- else
- emit(rv_lhu(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lh(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_16(sign_ext, rd, off, rs, ctx);
break;
case BPF_W:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lw(rd, off, rs), ctx);
- else
- emit(rv_lwu(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lw(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_32(sign_ext, rd, off, rs, ctx);
break;
case BPF_DW:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit_ld(rd, off, rs, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- emit_ld(rd, 0, RV_REG_T1, ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_64(sign_ext, rd, off, rs, ctx);
break;
}
@@ -1879,49 +2023,27 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* STX: *(size *)(dst + off) = src */
case BPF_STX | BPF_MEM | BPF_B:
- if (is_12b_int(off)) {
- emit(rv_sb(rd, off, rs), ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit(rv_sb(RV_REG_T1, 0, rs), ctx);
+ emit_store_8(rd, off, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_H:
- if (is_12b_int(off)) {
- emit(rv_sh(rd, off, rs), ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit(rv_sh(RV_REG_T1, 0, rs), ctx);
+ emit_store_16(rd, off, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_W:
- if (is_12b_int(off)) {
- emit_sw(rd, off, rs, ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit_sw(RV_REG_T1, 0, rs, ctx);
+ emit_store_32(rd, off, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_DW:
- if (is_12b_int(off)) {
- emit_sd(rd, off, rs, ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit_sd(RV_REG_T1, 0, rs, ctx);
+ emit_store_64(rd, off, rs, ctx);
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
- emit_atomic(rd, rs, off, imm,
- BPF_SIZE(code) == BPF_DW, ctx);
+ if (bpf_atomic_is_load_store(insn))
+ ret = emit_atomic_ld_st(rd, rs, insn, ctx);
+ else
+ ret = emit_atomic_rmw(rd, rs, insn, ctx);
+ if (ret)
+ return ret;
break;
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index f8cd2f70a7fb..f6ca5cfa6b2f 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -26,9 +26,8 @@ static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
int ret;
ret = bpf_jit_emit_insn(insn, ctx, extra_pass);
- /* BPF_LD | BPF_IMM | BPF_DW: skip the next instruction. */
if (ret > 0)
- i++;
+ i++; /* skip the next instruction */
if (offset)
offset[i] = ctx->ninsns;
if (ret < 0)
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 192835a3e24d..c7c96282f011 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void)
return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
}
-#ifdef CONFIG_EXPOLINE_EXTERN
-
void __s390_indirect_jump_r1(void);
void __s390_indirect_jump_r2(void);
void __s390_indirect_jump_r3(void);
@@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void);
void __s390_indirect_jump_r14(void);
void __s390_indirect_jump_r15(void);
-#endif
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 0776dfde2dba..c7f8313ba449 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -48,8 +48,6 @@ struct bpf_jit {
int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
int exit_ip; /* Address of exit */
- int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
- int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
@@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
jit->seen_regs |= (1 << r1);
}
+static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
+{
+ return off - jit->prg;
+}
+
+static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
+{
+ if (jit->prg_buf)
+ return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
+ return 0;
+}
+
#define REG_SET_SEEN(b1) \
({ \
reg_set_seen(jit, b1); \
@@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_PCREL_RIC(op, mask, target) \
({ \
- int __rel = ((target) - jit->prg) / 2; \
+ int __rel = off_to_pcrel(jit, target) / 2; \
_EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
@@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
(op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
@@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
(rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
@@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
- int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \
+ int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
+static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
+{
+ u32 pc32dbl = (s32)(pcrel / 2);
+
+ _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
+}
+
+static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
+ REG_SET_SEEN(b);
+}
+
#define EMIT6_PCREL_RILB(op, b, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
- REG_SET_SEEN(b); \
-})
+ emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
-#define EMIT6_PCREL_RIL(op, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | rel >> 16, rel & 0xffff); \
-})
+#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \
+ emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
+
+static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | mask << 20, pcrel);
+}
#define EMIT6_PCREL_RILC(op, mask, target) \
-({ \
- EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
-})
+ emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
+
+#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \
+ emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
#define _EMIT6_IMM(op, imm) \
({ \
@@ -503,7 +525,7 @@ static void bpf_skip(struct bpf_jit *jit, int size)
{
if (size >= 6 && !is_valid_rel(size)) {
/* brcl 0xf,size */
- EMIT6_PCREL_RIL(0xc0f4000000, size);
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
size -= 6;
} else if (size >= 4 && is_valid_rel(size)) {
/* brc 0xf,size */
@@ -605,43 +627,30 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
}
/* Setup stack and backchain */
if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* lgr %w1,%r15 (backchain) */
- EMIT4(0xb9040000, REG_W1, REG_15);
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* stg %w1,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
- REG_15, 152);
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15, 152);
}
}
/*
- * Emit an expoline for a jump that follows
+ * Jump using a register either directly or via an expoline thunk
*/
-static void emit_expoline(struct bpf_jit *jit)
-{
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
-}
-
-/*
- * Emit __s390_indirect_jump_r1 thunk if necessary
- */
-static void emit_r1_thunk(struct bpf_jit *jit)
-{
- if (nospec_uses_trampoline()) {
- jit->r1_thunk_ip = jit->prg;
- emit_expoline(jit);
- /* br %r1 */
- _EMIT2(0x07f1);
- }
-}
+#define EMIT_JUMP_REG(reg) do { \
+ if (nospec_uses_trampoline()) \
+ /* brcl 0xf,__s390_indirect_jump_rN */ \
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \
+ __s390_indirect_jump_r ## reg); \
+ else \
+ /* br %rN */ \
+ _EMIT2(0x07f0 | reg); \
+} while (0)
/*
* Call r1 either directly or via __s390_indirect_jump_r1 thunk
@@ -650,7 +659,8 @@ static void call_r1(struct bpf_jit *jit)
{
if (nospec_uses_trampoline())
/* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ __s390_indirect_jump_r1);
else
/* basr %r14,%r1 */
EMIT2(0x0d00, REG_14, REG_1);
@@ -666,16 +676,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
- if (nospec_uses_trampoline()) {
- jit->r14_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r14 thunk */
- emit_expoline(jit);
- }
- /* br %r14 */
- _EMIT2(0x07fe);
-
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- emit_r1_thunk(jit);
+ EMIT_JUMP_REG(14);
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
@@ -1877,7 +1878,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* aghi %r1,tail_call_start */
EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
/* brcl 0xf,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+ __s390_indirect_jump_r1);
} else {
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
@@ -2585,9 +2587,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (nr_stack_args > MAX_NR_STACK_ARGS)
return -ENOTSUPP;
- /* Return to %r14, since func_addr and %r0 are not available. */
- if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
- (flags & BPF_TRAMP_F_INDIRECT))
+ /* Return to %r14 in the struct_ops case. */
+ if (flags & BPF_TRAMP_F_INDIRECT)
flags |= BPF_TRAMP_F_SKIP_FRAME;
/*
@@ -2847,17 +2848,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
0xf000 | tjit->tccnt_off);
/* aghi %r15,stack_size */
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
- /* Emit an expoline for the following indirect jump. */
- if (nospec_uses_trampoline())
- emit_expoline(jit);
if (flags & BPF_TRAMP_F_SKIP_FRAME)
- /* br %r14 */
- _EMIT2(0x07fe);
+ EMIT_JUMP_REG(14);
else
- /* br %r1 */
- _EMIT2(0x07f1);
-
- emit_r1_thunk(jit);
+ EMIT_JUMP_REG(1);
return 0;
}
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index b34623a69b8a..6b13feed06df 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -533,6 +533,8 @@ static int ps_setup(struct hci_dev *hdev)
ps_host_wakeup_irq_handler,
IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
dev_name(&serdev->dev), nxpdev);
+ if (ret)
+ bt_dev_info(hdev, "error setting wakeup IRQ handler, ignoring\n");
disable_irq(psdata->irq_handler);
device_init_wakeup(&serdev->dev, true);
}
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index e00590ba24fd..a2dc39c005f4 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -2415,14 +2415,14 @@ static int qca_serdev_probe(struct serdev_device *serdev)
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
- if (IS_ERR(qcadev->bt_en) &&
- (data->soc_type == QCA_WCN6750 ||
- data->soc_type == QCA_WCN6855)) {
- dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
- return PTR_ERR(qcadev->bt_en);
- }
+ if (IS_ERR(qcadev->bt_en))
+ return dev_err_probe(&serdev->dev,
+ PTR_ERR(qcadev->bt_en),
+ "failed to acquire BT_EN gpio\n");
- if (!qcadev->bt_en)
+ if (!qcadev->bt_en &&
+ (data->soc_type == QCA_WCN6750 ||
+ data->soc_type == QCA_WCN6855))
power_ctrl_enabled = false;
qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 0c48d41dd5eb..890ecac04dac 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -19,7 +19,9 @@
#include <linux/anon_inodes.h>
#include <linux/export.h>
#include <linux/debugfs.h>
+#include <linux/list.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/seq_file.h>
#include <linux/sync_file.h>
#include <linux/poll.h>
@@ -35,35 +37,91 @@
static inline int is_dma_buf_file(struct file *);
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-static DEFINE_MUTEX(debugfs_list_mutex);
-static LIST_HEAD(debugfs_list);
+static DEFINE_MUTEX(dmabuf_list_mutex);
+static LIST_HEAD(dmabuf_list);
-static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf)
+static void __dma_buf_list_add(struct dma_buf *dmabuf)
{
- mutex_lock(&debugfs_list_mutex);
- list_add(&dmabuf->list_node, &debugfs_list);
- mutex_unlock(&debugfs_list_mutex);
+ mutex_lock(&dmabuf_list_mutex);
+ list_add(&dmabuf->list_node, &dmabuf_list);
+ mutex_unlock(&dmabuf_list_mutex);
}
-static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf)
+static void __dma_buf_list_del(struct dma_buf *dmabuf)
{
if (!dmabuf)
return;
- mutex_lock(&debugfs_list_mutex);
+ mutex_lock(&dmabuf_list_mutex);
list_del(&dmabuf->list_node);
- mutex_unlock(&debugfs_list_mutex);
+ mutex_unlock(&dmabuf_list_mutex);
}
-#else
-static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf)
+
+/**
+ * dma_buf_iter_begin - begin iteration through global list of all DMA buffers
+ *
+ * Returns the first buffer in the global list of DMA-bufs that's not in the
+ * process of being destroyed. Increments that buffer's reference count to
+ * prevent buffer destruction. Callers must release the reference, either by
+ * continuing iteration with dma_buf_iter_next(), or with dma_buf_put().
+ *
+ * Return:
+ * * First buffer from global list, with refcount elevated
+ * * NULL if no active buffers are present
+ */
+struct dma_buf *dma_buf_iter_begin(void)
{
+ struct dma_buf *ret = NULL, *dmabuf;
+
+ /*
+ * The list mutex does not protect a dmabuf's refcount, so it can be
+ * zeroed while we are iterating. We cannot call get_dma_buf() since the
+ * caller may not already own a reference to the buffer.
+ */
+ mutex_lock(&dmabuf_list_mutex);
+ list_for_each_entry(dmabuf, &dmabuf_list, list_node) {
+ if (file_ref_get(&dmabuf->file->f_ref)) {
+ ret = dmabuf;
+ break;
+ }
+ }
+ mutex_unlock(&dmabuf_list_mutex);
+ return ret;
}
-static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf)
+/**
+ * dma_buf_iter_next - continue iteration through global list of all DMA buffers
+ * @dmabuf: [in] pointer to dma_buf
+ *
+ * Decrements the reference count on the provided buffer. Returns the next
+ * buffer from the remainder of the global list of DMA-bufs with its reference
+ * count incremented. Callers must release the reference, either by continuing
+ * iteration with dma_buf_iter_next(), or with dma_buf_put().
+ *
+ * Return:
+ * * Next buffer from global list, with refcount elevated
+ * * NULL if no additional active buffers are present
+ */
+struct dma_buf *dma_buf_iter_next(struct dma_buf *dmabuf)
{
+ struct dma_buf *ret = NULL;
+
+ /*
+ * The list mutex does not protect a dmabuf's refcount, so it can be
+ * zeroed while we are iterating. We cannot call get_dma_buf() since the
+ * caller may not already own a reference to the buffer.
+ */
+ mutex_lock(&dmabuf_list_mutex);
+ dma_buf_put(dmabuf);
+ list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) {
+ if (file_ref_get(&dmabuf->file->f_ref)) {
+ ret = dmabuf;
+ break;
+ }
+ }
+ mutex_unlock(&dmabuf_list_mutex);
+ return ret;
}
-#endif
static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
{
@@ -115,7 +173,7 @@ static int dma_buf_file_release(struct inode *inode, struct file *file)
if (!is_dma_buf_file(file))
return -EINVAL;
- __dma_buf_debugfs_list_del(file->private_data);
+ __dma_buf_list_del(file->private_data);
return 0;
}
@@ -685,7 +743,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
file->f_path.dentry->d_fsdata = dmabuf;
dmabuf->file = file;
- __dma_buf_debugfs_list_add(dmabuf);
+ __dma_buf_list_add(dmabuf);
return dmabuf;
@@ -1563,7 +1621,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
size_t size = 0;
int ret;
- ret = mutex_lock_interruptible(&debugfs_list_mutex);
+ ret = mutex_lock_interruptible(&dmabuf_list_mutex);
if (ret)
return ret;
@@ -1572,7 +1630,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n",
"size", "flags", "mode", "count", "ino");
- list_for_each_entry(buf_obj, &debugfs_list, list_node) {
+ list_for_each_entry(buf_obj, &dmabuf_list, list_node) {
ret = dma_resv_lock_interruptible(buf_obj->resv, NULL);
if (ret)
@@ -1609,11 +1667,11 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size);
- mutex_unlock(&debugfs_list_mutex);
+ mutex_unlock(&dmabuf_list_mutex);
return 0;
error_unlock:
- mutex_unlock(&debugfs_list_mutex);
+ mutex_unlock(&dmabuf_list_mutex);
return ret;
}
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index 7d3066691d5d..52301511ed1b 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -966,7 +966,7 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
u32 status, tx_nr_packets_max;
netdev = alloc_candev(sizeof(struct kvaser_pciefd_can),
- KVASER_PCIEFD_CAN_TX_MAX_COUNT);
+ roundup_pow_of_two(KVASER_PCIEFD_CAN_TX_MAX_COUNT));
if (!netdev)
return -ENOMEM;
@@ -995,7 +995,6 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
can->tx_max_count = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1);
can->can.clock.freq = pcie->freq;
- can->can.echo_skb_max = roundup_pow_of_two(can->tx_max_count);
spin_lock_init(&can->lock);
can->can.bittiming_const = &kvaser_pciefd_bittiming_const;
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 132683ed3abe..862bdccb7439 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -22,6 +22,7 @@
#include <linux/gpio.h>
#include <linux/kernel.h>
#include <linux/math.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/platform_data/b53.h>
#include <linux/phy.h>
@@ -1322,41 +1323,17 @@ static void b53_adjust_63xx_rgmii(struct dsa_switch *ds, int port,
phy_interface_t interface)
{
struct b53_device *dev = ds->priv;
- u8 rgmii_ctrl = 0, off;
-
- if (port == dev->imp_port)
- off = B53_RGMII_CTRL_IMP;
- else
- off = B53_RGMII_CTRL_P(port);
+ u8 rgmii_ctrl = 0;
- b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl);
-
- switch (interface) {
- case PHY_INTERFACE_MODE_RGMII_ID:
- rgmii_ctrl |= (RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
- break;
- case PHY_INTERFACE_MODE_RGMII_RXID:
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_TXC);
- rgmii_ctrl |= RGMII_CTRL_DLL_RXC;
- break;
- case PHY_INTERFACE_MODE_RGMII_TXID:
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC);
- rgmii_ctrl |= RGMII_CTRL_DLL_TXC;
- break;
- case PHY_INTERFACE_MODE_RGMII:
- default:
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
- break;
- }
+ b53_read8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), &rgmii_ctrl);
+ rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
- if (port != dev->imp_port) {
- if (is63268(dev))
- rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE;
+ if (is63268(dev))
+ rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE;
- rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII;
- }
+ rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII;
- b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl);
+ b53_write8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), rgmii_ctrl);
dev_dbg(ds->dev, "Configured port %d for %s\n", port,
phy_modes(interface));
@@ -1377,8 +1354,7 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port,
* tx_clk aligned timing (restoring to reset defaults)
*/
b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl);
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC |
- RGMII_CTRL_TIMING_SEL);
+ rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
/* PHY_INTERFACE_MODE_RGMII_TXID means TX internal delay, make
* sure that we enable the port TX clock internal delay to
@@ -1398,7 +1374,10 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port,
rgmii_ctrl |= RGMII_CTRL_DLL_TXC;
if (interface == PHY_INTERFACE_MODE_RGMII)
rgmii_ctrl |= RGMII_CTRL_DLL_TXC | RGMII_CTRL_DLL_RXC;
- rgmii_ctrl |= RGMII_CTRL_TIMING_SEL;
+
+ if (dev->chip_id != BCM53115_DEVICE_ID)
+ rgmii_ctrl |= RGMII_CTRL_TIMING_SEL;
+
b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl);
dev_info(ds->dev, "Configured port %d for %s\n", port,
@@ -1462,6 +1441,10 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port,
__set_bit(PHY_INTERFACE_MODE_MII, config->supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_REVMII, config->supported_interfaces);
+ /* BCM63xx RGMII ports support RGMII */
+ if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4))
+ phy_interface_set_rgmii(config->supported_interfaces);
+
config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100;
@@ -1501,7 +1484,7 @@ static void b53_phylink_mac_config(struct phylink_config *config,
struct b53_device *dev = ds->priv;
int port = dp->index;
- if (is63xx(dev) && port >= B53_63XX_RGMII0)
+ if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4))
b53_adjust_63xx_rgmii(ds, port, interface);
if (mode == MLO_AN_FIXED) {
@@ -2353,6 +2336,9 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
{
int ret;
+ if (!b53_support_eee(ds, port))
+ return 0;
+
ret = phy_init_eee(phy, false);
if (ret)
return 0;
@@ -2367,7 +2353,7 @@ bool b53_support_eee(struct dsa_switch *ds, int port)
{
struct b53_device *dev = ds->priv;
- return !is5325(dev) && !is5365(dev);
+ return !is5325(dev) && !is5365(dev) && !is63xx(dev);
}
EXPORT_SYMBOL(b53_support_eee);
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index d1d3b854361e..a7ec609d64de 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -84,6 +84,8 @@ static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr)
val = (addr[3] << 16) | (addr[4] << 8) | addr[5];
airoha_fe_wr(eth, REG_FE_MAC_LMIN(reg), val);
airoha_fe_wr(eth, REG_FE_MAC_LMAX(reg), val);
+
+ airoha_ppe_init_upd_mem(port);
}
static void airoha_set_gdm_port_fwd_cfg(struct airoha_eth *eth, u32 addr,
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index b815697302bf..a970b789cf23 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -614,6 +614,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data);
int airoha_ppe_init(struct airoha_eth *eth);
void airoha_ppe_deinit(struct airoha_eth *eth);
+void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port);
struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe,
u32 hash);
void airoha_ppe_foe_entry_get_stats(struct airoha_ppe *ppe, u32 hash,
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 12d32c92717a..9067d2fc7706 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -223,6 +223,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
int dsa_port = airoha_get_dsa_port(&dev);
struct airoha_foe_mac_info_common *l2;
u32 qdata, ports_pad, val;
+ u8 smac_id = 0xf;
memset(hwe, 0, sizeof(*hwe));
@@ -257,6 +258,8 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
*/
if (airhoa_is_lan_gdm_port(port))
val |= AIROHA_FOE_IB2_FAST_PATH;
+
+ smac_id = port->id;
}
if (is_multicast_ether_addr(data->eth.h_dest))
@@ -291,7 +294,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
hwe->ipv4.l2.src_mac_lo =
get_unaligned_be16(data->eth.h_source + 4);
} else {
- l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, 0xf);
+ l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, smac_id);
}
if (data->vlan.num) {
@@ -636,7 +639,6 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe,
u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP;
struct airoha_foe_entry *hwe_p, hwe;
struct airoha_flow_table_entry *f;
- struct airoha_foe_mac_info *l2;
int type;
hwe_p = airoha_ppe_foe_get_entry(ppe, hash);
@@ -653,18 +655,25 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe,
memcpy(&hwe, hwe_p, sizeof(*hwe_p));
hwe.ib1 = (hwe.ib1 & mask) | (e->data.ib1 & ~mask);
- l2 = &hwe.bridge.l2;
- memcpy(l2, &e->data.bridge.l2, sizeof(*l2));
type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, hwe.ib1);
- if (type == PPE_PKT_TYPE_IPV4_HNAPT)
- memcpy(&hwe.ipv4.new_tuple, &hwe.ipv4.orig_tuple,
- sizeof(hwe.ipv4.new_tuple));
- else if (type >= PPE_PKT_TYPE_IPV6_ROUTE_3T &&
- l2->common.etype == ETH_P_IP)
- l2->common.etype = ETH_P_IPV6;
-
- hwe.bridge.ib2 = e->data.bridge.ib2;
+ if (type >= PPE_PKT_TYPE_IPV6_ROUTE_3T) {
+ memcpy(&hwe.ipv6.l2, &e->data.bridge.l2, sizeof(hwe.ipv6.l2));
+ hwe.ipv6.ib2 = e->data.bridge.ib2;
+ /* setting smac_id to 0xf instruct the hw to keep original
+ * source mac address
+ */
+ hwe.ipv6.l2.src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID,
+ 0xf);
+ } else {
+ memcpy(&hwe.bridge.l2, &e->data.bridge.l2,
+ sizeof(hwe.bridge.l2));
+ hwe.bridge.ib2 = e->data.bridge.ib2;
+ if (type == PPE_PKT_TYPE_IPV4_HNAPT)
+ memcpy(&hwe.ipv4.new_tuple, &hwe.ipv4.orig_tuple,
+ sizeof(hwe.ipv4.new_tuple));
+ }
+
hwe.bridge.data = e->data.bridge.data;
airoha_ppe_foe_commit_entry(ppe, &hwe, hash);
@@ -1238,6 +1247,27 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
airoha_ppe_foe_insert_entry(ppe, skb, hash);
}
+void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port)
+{
+ struct airoha_eth *eth = port->qdma->eth;
+ struct net_device *dev = port->dev;
+ const u8 *addr = dev->dev_addr;
+ u32 val;
+
+ val = (addr[2] << 24) | (addr[3] << 16) | (addr[4] << 8) | addr[5];
+ airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val);
+ airoha_fe_wr(eth, REG_UPDMEM_CTRL(0),
+ FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) |
+ PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK);
+
+ val = (addr[0] << 8) | addr[1];
+ airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val);
+ airoha_fe_wr(eth, REG_UPDMEM_CTRL(0),
+ FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) |
+ FIELD_PREP(PPE_UPDMEM_OFFSET_MASK, 1) |
+ PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK);
+}
+
int airoha_ppe_init(struct airoha_eth *eth)
{
struct airoha_ppe *ppe;
diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
index d931530fc96f..04187eb40ec6 100644
--- a/drivers/net/ethernet/airoha/airoha_regs.h
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
@@ -313,6 +313,16 @@
#define REG_PPE_RAM_BASE(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x320)
#define REG_PPE_RAM_ENTRY(_m, _n) (REG_PPE_RAM_BASE(_m) + ((_n) << 2))
+#define REG_UPDMEM_CTRL(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x370)
+#define PPE_UPDMEM_ACK_MASK BIT(31)
+#define PPE_UPDMEM_ADDR_MASK GENMASK(11, 8)
+#define PPE_UPDMEM_OFFSET_MASK GENMASK(7, 4)
+#define PPE_UPDMEM_SEL_MASK GENMASK(3, 2)
+#define PPE_UPDMEM_WR_MASK BIT(1)
+#define PPE_UPDMEM_REQ_MASK BIT(0)
+
+#define REG_UPDMEM_DATA(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x374)
+
#define REG_FE_GDM_TX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x280)
#define REG_FE_GDM_TX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x284)
#define REG_FE_GDM_TX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x288)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index e1ffbd561fac..7cd1eda0b449 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -2153,7 +2153,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
};
stats[stats_idx++] = (struct stats) {
.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
- .value = cpu_to_be64(priv->rx[0].fill_cnt),
+ .value = cpu_to_be64(priv->rx[idx].fill_cnt),
.queue_id = cpu_to_be32(idx),
};
}
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index a27f1574a733..9d705d94b065 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -764,6 +764,9 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx,
s16 completion_tag;
pkt = gve_alloc_pending_packet(tx);
+ if (!pkt)
+ return -ENOMEM;
+
pkt->skb = skb;
completion_tag = pkt - tx->dqo.pending_packets;
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
index 093aa6d775ff..497f2a36f35d 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
@@ -324,8 +324,6 @@ static __init int hinic3_nic_lld_init(void)
{
int err;
- pr_info("%s: %s\n", HINIC3_NIC_DRV_NAME, HINIC3_NIC_DRV_DESC);
-
err = hinic3_lld_init();
if (err)
return err;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 20d3baf955e3..d97d4b25b30d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2741,6 +2741,27 @@ void ice_map_xdp_rings(struct ice_vsi *vsi)
}
/**
+ * ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors
+ * @vsi: the VSI with XDP rings being unmapped
+ */
+static void ice_unmap_xdp_rings(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ struct ice_tx_ring *ring;
+
+ ice_for_each_tx_ring(ring, q_vector->tx)
+ if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+ break;
+
+ /* restore the value of last node prior to XDP setup */
+ q_vector->tx.tx_ring = ring;
+ }
+}
+
+/**
* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
* @vsi: VSI to bring up Tx rings used by XDP
* @prog: bpf program that will be assigned to VSI
@@ -2803,7 +2824,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
if (status) {
dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
status);
- goto clear_xdp_rings;
+ goto unmap_xdp_rings;
}
/* assign the prog only when it's not already present on VSI;
@@ -2819,6 +2840,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
ice_vsi_assign_bpf_prog(vsi, prog);
return 0;
+unmap_xdp_rings:
+ ice_unmap_xdp_rings(vsi);
clear_xdp_rings:
ice_for_each_xdp_txq(vsi, i)
if (vsi->xdp_rings[i]) {
@@ -2835,6 +2858,8 @@ err_map_xdp:
mutex_unlock(&pf->avail_q_mutex);
devm_kfree(dev, vsi->xdp_rings);
+ vsi->xdp_rings = NULL;
+
return -ENOMEM;
}
@@ -2850,7 +2875,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct ice_pf *pf = vsi->back;
- int i, v_idx;
+ int i;
/* q_vectors are freed in reset path so there's no point in detaching
* rings
@@ -2858,17 +2883,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
if (cfg_type == ICE_XDP_CFG_PART)
goto free_qmap;
- ice_for_each_q_vector(vsi, v_idx) {
- struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
- struct ice_tx_ring *ring;
-
- ice_for_each_tx_ring(ring, q_vector->tx)
- if (!ring->tx_buf || !ice_ring_is_xdp(ring))
- break;
-
- /* restore the value of last node prior to XDP setup */
- q_vector->tx.tx_ring = ring;
- }
+ ice_unmap_xdp_rings(vsi);
free_qmap:
mutex_lock(&pf->avail_q_mutex);
@@ -3013,11 +3028,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
if (xdp_ring_err) {
NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
+ goto resume_if;
} else {
xdp_ring_err = ice_prepare_xdp_rings(vsi, prog,
ICE_XDP_CFG_FULL);
- if (xdp_ring_err)
+ if (xdp_ring_err) {
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
+ goto resume_if;
+ }
}
xdp_features_set_redirect_target(vsi->netdev, true);
/* reallocate Rx queues that are used for zero-copy */
@@ -3035,6 +3053,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
}
+resume_if:
if (if_running)
ret = ice_up(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 6ca13c5dcb14..d9d09296d1d4 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -85,6 +85,27 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
}
/**
+ * ice_sched_find_next_vsi_node - find the next node for a given VSI
+ * @vsi_node: VSI support node to start search with
+ *
+ * Return: Next VSI support node, or NULL.
+ *
+ * The function returns a pointer to the next node from the VSI layer
+ * assigned to the given VSI, or NULL if there is no such a node.
+ */
+static struct ice_sched_node *
+ice_sched_find_next_vsi_node(struct ice_sched_node *vsi_node)
+{
+ unsigned int vsi_handle = vsi_node->vsi_handle;
+
+ while ((vsi_node = vsi_node->sibling) != NULL)
+ if (vsi_node->vsi_handle == vsi_handle)
+ break;
+
+ return vsi_node;
+}
+
+/**
* ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
* @hw: pointer to the HW struct
* @cmd_opc: cmd opcode
@@ -1084,8 +1105,10 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
if (parent->num_children < max_child_nodes) {
new_num_nodes = max_child_nodes - parent->num_children;
} else {
- /* This parent is full, try the next sibling */
- parent = parent->sibling;
+ /* This parent is full,
+ * try the next available sibling.
+ */
+ parent = ice_sched_find_next_vsi_node(parent);
/* Don't modify the first node TEID memory if the
* first node was added already in the above call.
* Instead send some temp memory for all other
@@ -1528,12 +1551,23 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
/* get the first queue group node from VSI sub-tree */
qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
while (qgrp_node) {
+ struct ice_sched_node *next_vsi_node;
+
/* make sure the qgroup node is part of the VSI subtree */
if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
if (qgrp_node->num_children < max_children &&
qgrp_node->owner == owner)
break;
qgrp_node = qgrp_node->sibling;
+ if (qgrp_node)
+ continue;
+
+ next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
+ if (!next_vsi_node)
+ break;
+
+ vsi_node = next_vsi_node;
+ qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
}
/* Select the best queue group */
@@ -1604,16 +1638,16 @@ ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
/**
* ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
* @hw: pointer to the HW struct
- * @num_qs: number of queues
+ * @num_new_qs: number of new queues that will be added to the tree
* @num_nodes: num nodes array
*
* This function calculates the number of VSI child nodes based on the
* number of queues.
*/
static void
-ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
+ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_new_qs, u16 *num_nodes)
{
- u16 num = num_qs;
+ u16 num = num_new_qs;
u8 i, qgl, vsil;
qgl = ice_sched_get_qgrp_layer(hw);
@@ -1779,7 +1813,11 @@ ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
if (!parent)
return -EIO;
- if (i == vsil)
+ /* Do not modify the VSI handle for already existing VSI nodes,
+ * (if no new VSI node was added to the tree).
+ * Assign the VSI handle only to newly added VSI nodes.
+ */
+ if (i == vsil && num_added)
parent->vsi_handle = vsi_handle;
}
@@ -1813,6 +1851,41 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
}
/**
+ * ice_sched_recalc_vsi_support_nodes - recalculate VSI support nodes count
+ * @hw: pointer to the HW struct
+ * @vsi_node: pointer to the leftmost VSI node that needs to be extended
+ * @new_numqs: new number of queues that has to be handled by the VSI
+ * @new_num_nodes: pointer to nodes count table to modify the VSI layer entry
+ *
+ * This function recalculates the number of supported nodes that need to
+ * be added after adding more Tx queues for a given VSI.
+ * The number of new VSI support nodes that shall be added will be saved
+ * to the @new_num_nodes table for the VSI layer.
+ */
+static void
+ice_sched_recalc_vsi_support_nodes(struct ice_hw *hw,
+ struct ice_sched_node *vsi_node,
+ unsigned int new_numqs, u16 *new_num_nodes)
+{
+ u32 vsi_nodes_cnt = 1;
+ u32 max_queue_cnt = 1;
+ u32 qgl, vsil;
+
+ qgl = ice_sched_get_qgrp_layer(hw);
+ vsil = ice_sched_get_vsi_layer(hw);
+
+ for (u32 i = vsil; i <= qgl; i++)
+ max_queue_cnt *= hw->max_children[i];
+
+ while ((vsi_node = ice_sched_find_next_vsi_node(vsi_node)) != NULL)
+ vsi_nodes_cnt++;
+
+ if (new_numqs > (max_queue_cnt * vsi_nodes_cnt))
+ new_num_nodes[vsil] = DIV_ROUND_UP(new_numqs, max_queue_cnt) -
+ vsi_nodes_cnt;
+}
+
+/**
* ice_sched_update_vsi_child_nodes - update VSI child nodes
* @pi: port information structure
* @vsi_handle: software VSI handle
@@ -1863,15 +1936,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
return status;
}
- if (new_numqs)
- ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
- /* Keep the max number of queue configuration all the time. Update the
- * tree only if number of queues > previous number of queues. This may
+ ice_sched_recalc_vsi_support_nodes(hw, vsi_node,
+ new_numqs, new_num_nodes);
+ ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs,
+ new_num_nodes);
+
+ /* Never decrease the number of queues in the tree. Update the tree
+ * only if number of queues > previous number of queues. This may
* leave some extra nodes in the tree if number of queues < previous
* number but that wouldn't harm anything. Removing those extra nodes
* may complicate the code if those nodes are part of SRL or
* individually rate limited.
+ * Also, add the required VSI support nodes if the existing ones cannot
+ * handle the requested new number of queues.
*/
+ status = ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
+ new_num_nodes);
+ if (status)
+ return status;
+
status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
new_num_nodes, owner);
if (status)
@@ -2013,6 +2096,58 @@ static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
}
/**
+ * ice_sched_rm_vsi_subtree - remove all nodes assigned to a given VSI
+ * @pi: port information structure
+ * @vsi_node: pointer to the leftmost node of the VSI to be removed
+ * @owner: LAN or RDMA
+ * @tc: TC number
+ *
+ * Return: Zero in case of success, or -EBUSY if the VSI has leaf nodes in TC.
+ *
+ * This function removes all the VSI support nodes associated with a given VSI
+ * and its LAN or RDMA children nodes from the scheduler tree.
+ */
+static int
+ice_sched_rm_vsi_subtree(struct ice_port_info *pi,
+ struct ice_sched_node *vsi_node, u8 owner, u8 tc)
+{
+ u16 vsi_handle = vsi_node->vsi_handle;
+ bool all_vsi_nodes_removed = true;
+ int j = 0;
+
+ while (vsi_node) {
+ struct ice_sched_node *next_vsi_node;
+
+ if (ice_sched_is_leaf_node_present(vsi_node)) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", tc);
+ return -EBUSY;
+ }
+ while (j < vsi_node->num_children) {
+ if (vsi_node->children[j]->owner == owner)
+ ice_free_sched_node(pi, vsi_node->children[j]);
+ else
+ j++;
+ }
+
+ next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
+
+ /* remove the VSI if it has no children */
+ if (!vsi_node->num_children)
+ ice_free_sched_node(pi, vsi_node);
+ else
+ all_vsi_nodes_removed = false;
+
+ vsi_node = next_vsi_node;
+ }
+
+ /* clean up aggregator related VSI info if any */
+ if (all_vsi_nodes_removed)
+ ice_sched_rm_agg_vsi_info(pi, vsi_handle);
+
+ return 0;
+}
+
+/**
* ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
* @pi: port information structure
* @vsi_handle: software VSI handle
@@ -2038,7 +2173,6 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
ice_for_each_traffic_class(i) {
struct ice_sched_node *vsi_node, *tc_node;
- u8 j = 0;
tc_node = ice_sched_get_tc_node(pi, i);
if (!tc_node)
@@ -2048,31 +2182,12 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
if (!vsi_node)
continue;
- if (ice_sched_is_leaf_node_present(vsi_node)) {
- ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i);
- status = -EBUSY;
+ status = ice_sched_rm_vsi_subtree(pi, vsi_node, owner, i);
+ if (status)
goto exit_sched_rm_vsi_cfg;
- }
- while (j < vsi_node->num_children) {
- if (vsi_node->children[j]->owner == owner) {
- ice_free_sched_node(pi, vsi_node->children[j]);
- /* reset the counter again since the num
- * children will be updated after node removal
- */
- j = 0;
- } else {
- j++;
- }
- }
- /* remove the VSI if it has no children */
- if (!vsi_node->num_children) {
- ice_free_sched_node(pi, vsi_node);
- vsi_ctx->sched.vsi_node[i] = NULL;
+ vsi_ctx->sched.vsi_node[i] = NULL;
- /* clean up aggregator related VSI info if any */
- ice_sched_rm_agg_vsi_info(pi, vsi_handle);
- }
if (owner == ICE_SCHED_NODE_OWNER_LAN)
vsi_ctx->sched.max_lanq[i] = 0;
else
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index bab12ecb2df5..4eb20ec2accb 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1801,11 +1801,19 @@ void idpf_vc_event_task(struct work_struct *work)
if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
return;
- if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) ||
- test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
- set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
- idpf_init_hard_reset(adapter);
- }
+ if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags))
+ goto func_reset;
+
+ if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags))
+ goto drv_load;
+
+ return;
+
+func_reset:
+ idpf_vc_xn_shutdown(adapter->vcxn_mngr);
+drv_load:
+ set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+ idpf_init_hard_reset(adapter);
}
/**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
index 2e356dd10812..993c354aa27a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@@ -362,17 +362,18 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
{
struct idpf_tx_offload_params offload = { };
struct idpf_tx_buf *first;
+ int csum, tso, needed;
unsigned int count;
__be16 protocol;
- int csum, tso;
count = idpf_tx_desc_count_required(tx_q, skb);
if (unlikely(!count))
return idpf_tx_drop_skb(tx_q, skb);
- if (idpf_tx_maybe_stop_common(tx_q,
- count + IDPF_TX_DESCS_PER_CACHE_LINE +
- IDPF_TX_DESCS_FOR_CTX)) {
+ needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX;
+ if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
+ IDPF_DESC_UNUSED(tx_q),
+ needed, needed)) {
idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
u64_stats_update_begin(&tx_q->stats_sync);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 631679cdaa6f..5cf440e09d0a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2184,6 +2184,19 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag);
}
+/* Global conditions to tell whether the txq (and related resources)
+ * has room to allow the use of "size" descriptors.
+ */
+static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size)
+{
+ if (IDPF_DESC_UNUSED(tx_q) < size ||
+ IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
+ IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) ||
+ IDPF_TX_BUF_RSV_LOW(tx_q))
+ return 0;
+ return 1;
+}
+
/**
* idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
* @tx_q: the queue to be checked
@@ -2194,29 +2207,11 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
unsigned int descs_needed)
{
- if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
- goto out;
-
- /* If there are too many outstanding completions expected on the
- * completion queue, stop the TX queue to give the device some time to
- * catch up
- */
- if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
- IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq)))
- goto splitq_stop;
-
- /* Also check for available book keeping buffers; if we are low, stop
- * the queue to wait for more completions
- */
- if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q)))
- goto splitq_stop;
-
- return 0;
-
-splitq_stop:
- netif_stop_subqueue(tx_q->netdev, tx_q->idx);
+ if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
+ idpf_txq_has_room(tx_q, descs_needed),
+ 1, 1))
+ return 0;
-out:
u64_stats_update_begin(&tx_q->stats_sync);
u64_stats_inc(&tx_q->q_stats.q_busy);
u64_stats_update_end(&tx_q->stats_sync);
@@ -2242,12 +2237,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
tx_q->next_to_use = val;
- if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) {
- u64_stats_update_begin(&tx_q->stats_sync);
- u64_stats_inc(&tx_q->q_stats.q_busy);
- u64_stats_update_end(&tx_q->stats_sync);
- }
-
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index c779fe71df99..36a0f828a6f8 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -1049,12 +1049,4 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
u16 cleaned_count);
int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
-static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q,
- u32 needed)
-{
- return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
- IDPF_DESC_UNUSED(tx_q),
- needed, needed);
-}
-
#endif /* !_IDPF_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index 07a9f5ae34fd..24febaaa8fbb 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -347,7 +347,7 @@ static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr)
* All waiting threads will be woken-up and their transaction aborted. Further
* operations on that object will fail.
*/
-static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr)
+void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr)
{
int i;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
index 3522c1238ea2..77578206bada 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
@@ -150,5 +150,6 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport);
int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get);
int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get);
+void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr);
#endif /* _IDPF_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index b175119a6a7d..b83886a41121 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1463,6 +1463,8 @@ static __maybe_unused int mtk_star_suspend(struct device *dev)
if (netif_running(ndev))
mtk_star_disable(ndev);
+ netif_device_detach(ndev);
+
clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks);
return 0;
@@ -1487,6 +1489,8 @@ static __maybe_unused int mtk_star_resume(struct device *dev)
clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks);
}
+ netif_device_attach(ndev);
+
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index cd754cd76bde..d73a2044dc26 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -249,7 +249,7 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = {
static u32 freq_to_shift(u16 freq)
{
u32 freq_khz = freq * 1000;
- u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
+ u64 max_val_cycles = freq_khz * 1000ULL * MLX4_EN_WRAP_AROUND_SEC;
u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1);
/* calculate max possible multiplier in order to fit in 64bit */
u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 427bdc0e4908..7001584f1b7a 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -879,6 +879,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
lan966x_vlan_port_set_vlan_aware(port, 0);
lan966x_vlan_port_set_vid(port, HOST_PVID, false, false);
lan966x_vlan_port_apply(port);
+ lan966x_vlan_port_rew_host(port);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 1f9df67f0504..4f75f0688369 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -497,6 +497,7 @@ void lan966x_vlan_port_apply(struct lan966x_port *port);
bool lan966x_vlan_cpu_member_cpu_vlan_mask(struct lan966x *lan966x, u16 vid);
void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port,
bool vlan_aware);
+void lan966x_vlan_port_rew_host(struct lan966x_port *port);
int lan966x_vlan_port_set_vid(struct lan966x_port *port,
u16 vid,
bool pvid,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index 1c88120eb291..bcb4db76b75c 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -297,6 +297,7 @@ static void lan966x_port_bridge_leave(struct lan966x_port *port,
lan966x_vlan_port_set_vlan_aware(port, false);
lan966x_vlan_port_set_vid(port, HOST_PVID, false, false);
lan966x_vlan_port_apply(port);
+ lan966x_vlan_port_rew_host(port);
}
int lan966x_port_changeupper(struct net_device *dev,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c
index fa34a739c748..7da22520724c 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c
@@ -149,6 +149,27 @@ void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port,
port->vlan_aware = vlan_aware;
}
+/* When the interface is in host mode, the interface should not be vlan aware
+ * but it should insert all the tags that it gets from the network stack.
+ * The tags are not in the data of the frame but actually in the skb and the ifh
+ * is configured already to get this tag. So what we need to do is to update the
+ * rewriter to insert the vlan tag for all frames which have a vlan tag
+ * different than 0.
+ */
+void lan966x_vlan_port_rew_host(struct lan966x_port *port)
+{
+ struct lan966x *lan966x = port->lan966x;
+ u32 val;
+
+ /* Tag all frames except when VID=0*/
+ val = REW_TAG_CFG_TAG_CFG_SET(2);
+
+ /* Update only some bits in the register */
+ lan_rmw(val,
+ REW_TAG_CFG_TAG_CFG,
+ lan966x, REW_TAG_CFG(port->chip_port));
+}
+
void lan966x_vlan_port_apply(struct lan966x_port *port)
{
struct lan966x *lan966x = port->lan966x;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
index c9693f77e1f6..ac6f2e3a3fcd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
@@ -32,6 +32,11 @@ static int est_configure(struct stmmac_priv *priv, struct stmmac_est *cfg,
int i, ret = 0;
u32 ctrl;
+ if (!ptp_rate) {
+ netdev_warn(priv->dev, "Invalid PTP rate");
+ return -EINVAL;
+ }
+
ret |= est_write(est_addr, EST_BTR_LOW, cfg->btr[0], false);
ret |= est_write(est_addr, EST_BTR_HIGH, cfg->btr[1], false);
ret |= est_write(est_addr, EST_TER, cfg->ter, false);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 085c09039af4..1369fa70bc58 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -805,6 +805,11 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
return -EOPNOTSUPP;
+ if (!priv->plat->clk_ptp_rate) {
+ netdev_err(priv->dev, "Invalid PTP clock rate");
+ return -EINVAL;
+ }
+
stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
priv->systime_flags = systime_flags;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 43c869f64c39..b80c1efdb323 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -430,6 +430,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
struct device_node *np = pdev->dev.of_node;
struct plat_stmmacenet_data *plat;
struct stmmac_dma_cfg *dma_cfg;
+ static int bus_id = -ENODEV;
int phy_mode;
void *ret;
int rc;
@@ -465,8 +466,14 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
of_property_read_u32(np, "max-speed", &plat->max_speed);
plat->bus_id = of_alias_get_id(np, "ethernet");
- if (plat->bus_id < 0)
- plat->bus_id = 0;
+ if (plat->bus_id < 0) {
+ if (bus_id < 0)
+ bus_id = of_alias_get_highest_id("ethernet");
+ /* No ethernet alias found, init at -1 so first bus_id is 0 */
+ if (bus_id < 0)
+ bus_id = -1;
+ plat->bus_id = ++bus_id;
+ }
/* Default to phy auto-detection */
plat->phy_addr = -1;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 429b2d357813..3767ba495e78 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -317,7 +317,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
/* Calculate the clock domain crossing (CDC) error if necessary */
priv->plat->cdc_error_adj = 0;
- if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate)
+ if (priv->plat->has_gmac4)
priv->plat->cdc_error_adj = (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate;
/* Update the ptp clock parameters based on feature discovery, when
diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c
index e8241e998aa9..7159baa0155c 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_stats.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c
@@ -28,6 +28,14 @@ void emac_update_hardware_stats(struct prueth_emac *emac)
spin_lock(&prueth->stats_lock);
for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) {
+ /* In MII mode TX lines are swapped inside ICSSG, so read Tx stats
+ * from slice1 for port0 and slice0 for port1 to get accurate Tx
+ * stats for a given port
+ */
+ if (emac->phy_if == PHY_INTERFACE_MODE_MII &&
+ icssg_all_miig_stats[i].offset >= ICSSG_TX_PACKET_OFFSET &&
+ icssg_all_miig_stats[i].offset <= ICSSG_TX_BYTE_OFFSET)
+ base = stats_base[slice ^ 1];
regmap_read(prueth->miig_rt,
base + icssg_all_miig_stats[i].offset,
&val);
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index e01c5997a551..1dd3755d9e6d 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
- ret = dev_xdp_propagate(vf_netdev, &xdp);
+ ret = netif_xdp_propagate(vf_netdev, &xdp);
if (ret && prog)
bpf_prog_put(prog);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 14a0d04e21ae..c41a025c66f0 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2462,8 +2462,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
- netvsc_vf_setxdp(vf_netdev, NULL);
-
reinit_completion(&net_device_ctx->vf_add);
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
@@ -2631,7 +2629,9 @@ static int netvsc_probe(struct hv_device *dev,
continue;
netvsc_prepare_bonding(vf_netdev);
+ netdev_lock_ops(vf_netdev);
netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+ netdev_unlock_ops(vf_netdev);
__netvsc_vf_setup(net, vf_netdev);
break;
}
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index 10d8afecec55..ebf1e849506b 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -134,7 +134,7 @@ void ovpn_decrypt_post(void *data, int ret)
rcu_read_lock();
sock = rcu_dereference(peer->sock);
- if (sock && sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ if (sock && sock->sk->sk_protocol == IPPROTO_UDP)
/* check if this peer changed local or remote endpoint */
ovpn_peer_endpoints_update(peer, skb);
rcu_read_unlock();
@@ -270,12 +270,12 @@ void ovpn_encrypt_post(void *data, int ret)
if (unlikely(!sock))
goto err_unlock;
- switch (sock->sock->sk->sk_protocol) {
+ switch (sock->sk->sk_protocol) {
case IPPROTO_UDP:
- ovpn_udp_send_skb(peer, sock->sock, skb);
+ ovpn_udp_send_skb(peer, sock->sk, skb);
break;
case IPPROTO_TCP:
- ovpn_tcp_send_skb(peer, sock->sock, skb);
+ ovpn_tcp_send_skb(peer, sock->sk, skb);
break;
default:
/* no transport configured yet */
diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
index bea03913bfb1..a4ec53def46e 100644
--- a/drivers/net/ovpn/netlink.c
+++ b/drivers/net/ovpn/netlink.c
@@ -501,7 +501,7 @@ int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info)
/* when using a TCP socket the remote IP is not expected */
rcu_read_lock();
sock = rcu_dereference(peer->sock);
- if (sock && sock->sock->sk->sk_protocol == IPPROTO_TCP &&
+ if (sock && sock->sk->sk_protocol == IPPROTO_TCP &&
(attrs[OVPN_A_PEER_REMOTE_IPV4] ||
attrs[OVPN_A_PEER_REMOTE_IPV6])) {
rcu_read_unlock();
@@ -559,14 +559,14 @@ static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info,
goto err_unlock;
}
- if (!net_eq(genl_info_net(info), sock_net(sock->sock->sk))) {
+ if (!net_eq(genl_info_net(info), sock_net(sock->sk))) {
id = peernet2id_alloc(genl_info_net(info),
- sock_net(sock->sock->sk),
+ sock_net(sock->sk),
GFP_ATOMIC);
if (nla_put_s32(skb, OVPN_A_PEER_SOCKET_NETNSID, id))
goto err_unlock;
}
- local_port = inet_sk(sock->sock->sk)->inet_sport;
+ local_port = inet_sk(sock->sk)->inet_sport;
rcu_read_unlock();
if (nla_put_u32(skb, OVPN_A_PEER_ID, peer->id))
@@ -1153,8 +1153,8 @@ int ovpn_nl_peer_del_notify(struct ovpn_peer *peer)
ret = -EINVAL;
goto err_unlock;
}
- genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
- msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
+ genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sk), msg, 0,
+ OVPN_NLGRP_PEERS, GFP_ATOMIC);
rcu_read_unlock();
return 0;
@@ -1218,8 +1218,8 @@ int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id)
ret = -EINVAL;
goto err_unlock;
}
- genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
- msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
+ genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sk), msg, 0,
+ OVPN_NLGRP_PEERS, GFP_ATOMIC);
rcu_read_unlock();
return 0;
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index a1fd27b9c038..4bfcab0c8652 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -1145,7 +1145,7 @@ static void ovpn_peer_release_p2p(struct ovpn_priv *ovpn, struct sock *sk,
if (sk) {
ovpn_sock = rcu_access_pointer(peer->sock);
- if (!ovpn_sock || ovpn_sock->sock->sk != sk) {
+ if (!ovpn_sock || ovpn_sock->sk != sk) {
spin_unlock_bh(&ovpn->lock);
ovpn_peer_put(peer);
return;
@@ -1175,7 +1175,7 @@ static void ovpn_peers_release_mp(struct ovpn_priv *ovpn, struct sock *sk,
if (sk) {
rcu_read_lock();
ovpn_sock = rcu_dereference(peer->sock);
- remove = ovpn_sock && ovpn_sock->sock->sk == sk;
+ remove = ovpn_sock && ovpn_sock->sk == sk;
rcu_read_unlock();
}
diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
index a83cbab72591..9750871ab65c 100644
--- a/drivers/net/ovpn/socket.c
+++ b/drivers/net/ovpn/socket.c
@@ -24,9 +24,9 @@ static void ovpn_socket_release_kref(struct kref *kref)
struct ovpn_socket *sock = container_of(kref, struct ovpn_socket,
refcount);
- if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ if (sock->sk->sk_protocol == IPPROTO_UDP)
ovpn_udp_socket_detach(sock);
- else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
+ else if (sock->sk->sk_protocol == IPPROTO_TCP)
ovpn_tcp_socket_detach(sock);
}
@@ -75,14 +75,6 @@ void ovpn_socket_release(struct ovpn_peer *peer)
if (!sock)
return;
- /* sanity check: we should not end up here if the socket
- * was already closed
- */
- if (!sock->sock->sk) {
- DEBUG_NET_WARN_ON_ONCE(1);
- return;
- }
-
/* Drop the reference while holding the sock lock to avoid
* concurrent ovpn_socket_new call to mess up with a partially
* detached socket.
@@ -90,22 +82,24 @@ void ovpn_socket_release(struct ovpn_peer *peer)
* Holding the lock ensures that a socket with refcnt 0 is fully
* detached before it can be picked by a concurrent reader.
*/
- lock_sock(sock->sock->sk);
+ lock_sock(sock->sk);
released = ovpn_socket_put(peer, sock);
- release_sock(sock->sock->sk);
+ release_sock(sock->sk);
/* align all readers with sk_user_data being NULL */
synchronize_rcu();
/* following cleanup should happen with lock released */
if (released) {
- if (sock->sock->sk->sk_protocol == IPPROTO_UDP) {
+ if (sock->sk->sk_protocol == IPPROTO_UDP) {
netdev_put(sock->ovpn->dev, &sock->dev_tracker);
- } else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) {
+ } else if (sock->sk->sk_protocol == IPPROTO_TCP) {
/* wait for TCP jobs to terminate */
ovpn_tcp_socket_wait_finish(sock);
ovpn_peer_put(sock->peer);
}
+ /* drop reference acquired in ovpn_socket_new() */
+ sock_put(sock->sk);
/* we can call plain kfree() because we already waited one RCU
* period due to synchronize_rcu()
*/
@@ -118,12 +112,14 @@ static bool ovpn_socket_hold(struct ovpn_socket *sock)
return kref_get_unless_zero(&sock->refcount);
}
-static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer)
+static int ovpn_socket_attach(struct ovpn_socket *ovpn_sock,
+ struct socket *sock,
+ struct ovpn_peer *peer)
{
- if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
- return ovpn_udp_socket_attach(sock, peer->ovpn);
- else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
- return ovpn_tcp_socket_attach(sock, peer);
+ if (sock->sk->sk_protocol == IPPROTO_UDP)
+ return ovpn_udp_socket_attach(ovpn_sock, sock, peer->ovpn);
+ else if (sock->sk->sk_protocol == IPPROTO_TCP)
+ return ovpn_tcp_socket_attach(ovpn_sock, peer);
return -EOPNOTSUPP;
}
@@ -138,14 +134,15 @@ static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer)
struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
{
struct ovpn_socket *ovpn_sock;
+ struct sock *sk = sock->sk;
int ret;
- lock_sock(sock->sk);
+ lock_sock(sk);
/* a TCP socket can only be owned by a single peer, therefore there
* can't be any other user
*/
- if (sock->sk->sk_protocol == IPPROTO_TCP && sock->sk->sk_user_data) {
+ if (sk->sk_protocol == IPPROTO_TCP && sk->sk_user_data) {
ovpn_sock = ERR_PTR(-EBUSY);
goto sock_release;
}
@@ -153,8 +150,8 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
/* a UDP socket can be shared across multiple peers, but we must make
* sure it is not owned by something else
*/
- if (sock->sk->sk_protocol == IPPROTO_UDP) {
- u8 type = READ_ONCE(udp_sk(sock->sk)->encap_type);
+ if (sk->sk_protocol == IPPROTO_UDP) {
+ u8 type = READ_ONCE(udp_sk(sk)->encap_type);
/* socket owned by other encapsulation module */
if (type && type != UDP_ENCAP_OVPNINUDP) {
@@ -163,7 +160,7 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
}
rcu_read_lock();
- ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
+ ovpn_sock = rcu_dereference_sk_user_data(sk);
if (ovpn_sock) {
/* socket owned by another ovpn instance, we can't use it */
if (ovpn_sock->ovpn != peer->ovpn) {
@@ -200,11 +197,22 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
goto sock_release;
}
- ovpn_sock->sock = sock;
+ ovpn_sock->sk = sk;
kref_init(&ovpn_sock->refcount);
- ret = ovpn_socket_attach(ovpn_sock, peer);
+ /* the newly created ovpn_socket is holding reference to sk,
+ * therefore we increase its refcounter.
+ *
+ * This ovpn_socket instance is referenced by all peers
+ * using the same socket.
+ *
+ * ovpn_socket_release() will take care of dropping the reference.
+ */
+ sock_hold(sk);
+
+ ret = ovpn_socket_attach(ovpn_sock, sock, peer);
if (ret < 0) {
+ sock_put(sk);
kfree(ovpn_sock);
ovpn_sock = ERR_PTR(ret);
goto sock_release;
@@ -213,11 +221,11 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
/* TCP sockets are per-peer, therefore they are linked to their unique
* peer
*/
- if (sock->sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_protocol == IPPROTO_TCP) {
INIT_WORK(&ovpn_sock->tcp_tx_work, ovpn_tcp_tx_work);
ovpn_sock->peer = peer;
ovpn_peer_hold(peer);
- } else if (sock->sk->sk_protocol == IPPROTO_UDP) {
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
/* in UDP we only link the ovpn instance since the socket is
* shared among multiple peers
*/
@@ -226,8 +234,8 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
GFP_KERNEL);
}
- rcu_assign_sk_user_data(sock->sk, ovpn_sock);
+ rcu_assign_sk_user_data(sk, ovpn_sock);
sock_release:
- release_sock(sock->sk);
+ release_sock(sk);
return ovpn_sock;
}
diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
index 00d856b1a5d8..4afcec71040d 100644
--- a/drivers/net/ovpn/socket.h
+++ b/drivers/net/ovpn/socket.h
@@ -22,7 +22,7 @@ struct ovpn_peer;
* @ovpn: ovpn instance owning this socket (UDP only)
* @dev_tracker: reference tracker for associated dev (UDP only)
* @peer: unique peer transmitting over this socket (TCP only)
- * @sock: the low level sock object
+ * @sk: the low level sock object
* @refcount: amount of contexts currently referencing this object
* @work: member used to schedule release routine (it may block)
* @tcp_tx_work: work for deferring outgoing packet processing (TCP only)
@@ -36,7 +36,7 @@ struct ovpn_socket {
struct ovpn_peer *peer;
};
- struct socket *sock;
+ struct sock *sk;
struct kref refcount;
struct work_struct work;
struct work_struct tcp_tx_work;
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
index 7c42d84987ad..289f62c5d2c7 100644
--- a/drivers/net/ovpn/tcp.c
+++ b/drivers/net/ovpn/tcp.c
@@ -124,14 +124,18 @@ static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
* this peer, therefore ovpn_peer_hold() is not expected to fail
*/
if (WARN_ON(!ovpn_peer_hold(peer)))
- goto err;
+ goto err_nopeer;
ovpn_recv(peer, skb);
return;
err:
+ /* take reference for deferred peer deletion. should never fail */
+ if (WARN_ON(!ovpn_peer_hold(peer)))
+ goto err_nopeer;
+ schedule_work(&peer->tcp.defer_del_work);
dev_dstats_rx_dropped(peer->ovpn->dev);
+err_nopeer:
kfree_skb(skb);
- ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
}
static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
@@ -186,18 +190,18 @@ out:
void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock)
{
struct ovpn_peer *peer = ovpn_sock->peer;
- struct socket *sock = ovpn_sock->sock;
+ struct sock *sk = ovpn_sock->sk;
strp_stop(&peer->tcp.strp);
skb_queue_purge(&peer->tcp.user_queue);
/* restore CBs that were saved in ovpn_sock_set_tcp_cb() */
- sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
- sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
- sock->sk->sk_prot = peer->tcp.sk_cb.prot;
- sock->sk->sk_socket->ops = peer->tcp.sk_cb.ops;
+ sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
+ sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
+ sk->sk_prot = peer->tcp.sk_cb.prot;
+ sk->sk_socket->ops = peer->tcp.sk_cb.ops;
- rcu_assign_sk_user_data(sock->sk, NULL);
+ rcu_assign_sk_user_data(sk, NULL);
}
void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock)
@@ -283,10 +287,10 @@ void ovpn_tcp_tx_work(struct work_struct *work)
sock = container_of(work, struct ovpn_socket, tcp_tx_work);
- lock_sock(sock->sock->sk);
+ lock_sock(sock->sk);
if (sock->peer)
- ovpn_tcp_send_sock(sock->peer, sock->sock->sk);
- release_sock(sock->sock->sk);
+ ovpn_tcp_send_sock(sock->peer, sock->sk);
+ release_sock(sock->sk);
}
static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
@@ -307,15 +311,15 @@ static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
ovpn_tcp_send_sock(peer, sk);
}
-void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk,
struct sk_buff *skb)
{
u16 len = skb->len;
*(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
- spin_lock_nested(&sock->sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);
- if (sock_owned_by_user(sock->sk)) {
+ spin_lock_nested(&sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);
+ if (sock_owned_by_user(sk)) {
if (skb_queue_len(&peer->tcp.out_queue) >=
READ_ONCE(net_hotdata.max_backlog)) {
dev_dstats_tx_dropped(peer->ovpn->dev);
@@ -324,10 +328,10 @@ void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
}
__skb_queue_tail(&peer->tcp.out_queue, skb);
} else {
- ovpn_tcp_send_sock_skb(peer, sock->sk, skb);
+ ovpn_tcp_send_sock_skb(peer, sk, skb);
}
unlock:
- spin_unlock(&sock->sk->sk_lock.slock);
+ spin_unlock(&sk->sk_lock.slock);
}
static void ovpn_tcp_release(struct sock *sk)
@@ -474,7 +478,6 @@ static void ovpn_tcp_peer_del_work(struct work_struct *work)
int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
struct ovpn_peer *peer)
{
- struct socket *sock = ovpn_sock->sock;
struct strp_callbacks cb = {
.rcv_msg = ovpn_tcp_rcv,
.parse_msg = ovpn_tcp_parse,
@@ -482,20 +485,20 @@ int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
int ret;
/* make sure no pre-existing encapsulation handler exists */
- if (sock->sk->sk_user_data)
+ if (ovpn_sock->sk->sk_user_data)
return -EBUSY;
/* only a fully connected socket is expected. Connection should be
* handled in userspace
*/
- if (sock->sk->sk_state != TCP_ESTABLISHED) {
+ if (ovpn_sock->sk->sk_state != TCP_ESTABLISHED) {
net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n",
netdev_name(peer->ovpn->dev),
- sock->sk->sk_state);
+ ovpn_sock->sk->sk_state);
return -EINVAL;
}
- ret = strp_init(&peer->tcp.strp, sock->sk, &cb);
+ ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb);
if (ret < 0) {
DEBUG_NET_WARN_ON_ONCE(1);
return ret;
@@ -503,31 +506,31 @@ int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work);
- __sk_dst_reset(sock->sk);
+ __sk_dst_reset(ovpn_sock->sk);
skb_queue_head_init(&peer->tcp.user_queue);
skb_queue_head_init(&peer->tcp.out_queue);
/* save current CBs so that they can be restored upon socket release */
- peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready;
- peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space;
- peer->tcp.sk_cb.prot = sock->sk->sk_prot;
- peer->tcp.sk_cb.ops = sock->sk->sk_socket->ops;
+ peer->tcp.sk_cb.sk_data_ready = ovpn_sock->sk->sk_data_ready;
+ peer->tcp.sk_cb.sk_write_space = ovpn_sock->sk->sk_write_space;
+ peer->tcp.sk_cb.prot = ovpn_sock->sk->sk_prot;
+ peer->tcp.sk_cb.ops = ovpn_sock->sk->sk_socket->ops;
/* assign our static CBs and prot/ops */
- sock->sk->sk_data_ready = ovpn_tcp_data_ready;
- sock->sk->sk_write_space = ovpn_tcp_write_space;
+ ovpn_sock->sk->sk_data_ready = ovpn_tcp_data_ready;
+ ovpn_sock->sk->sk_write_space = ovpn_tcp_write_space;
- if (sock->sk->sk_family == AF_INET) {
- sock->sk->sk_prot = &ovpn_tcp_prot;
- sock->sk->sk_socket->ops = &ovpn_tcp_ops;
+ if (ovpn_sock->sk->sk_family == AF_INET) {
+ ovpn_sock->sk->sk_prot = &ovpn_tcp_prot;
+ ovpn_sock->sk->sk_socket->ops = &ovpn_tcp_ops;
} else {
- sock->sk->sk_prot = &ovpn_tcp6_prot;
- sock->sk->sk_socket->ops = &ovpn_tcp6_ops;
+ ovpn_sock->sk->sk_prot = &ovpn_tcp6_prot;
+ ovpn_sock->sk->sk_socket->ops = &ovpn_tcp6_ops;
}
/* avoid using task_frag */
- sock->sk->sk_allocation = GFP_ATOMIC;
- sock->sk->sk_use_task_frag = false;
+ ovpn_sock->sk->sk_allocation = GFP_ATOMIC;
+ ovpn_sock->sk->sk_use_task_frag = false;
/* enqueue the RX worker */
strp_check_rcv(&peer->tcp.strp);
diff --git a/drivers/net/ovpn/tcp.h b/drivers/net/ovpn/tcp.h
index 10aefa834cf3..a3aa3570ae5e 100644
--- a/drivers/net/ovpn/tcp.h
+++ b/drivers/net/ovpn/tcp.h
@@ -30,7 +30,8 @@ void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock);
* Required by the OpenVPN protocol in order to extract packets from
* the TCP stream on the receiver side.
*/
-void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock, struct sk_buff *skb);
+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk,
+ struct sk_buff *skb);
void ovpn_tcp_tx_work(struct work_struct *work);
#endif /* _NET_OVPN_TCP_H_ */
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
index aef8c0406ec9..bff00946eae2 100644
--- a/drivers/net/ovpn/udp.c
+++ b/drivers/net/ovpn/udp.c
@@ -43,7 +43,7 @@ static struct ovpn_socket *ovpn_socket_from_udp_sock(struct sock *sk)
return NULL;
/* make sure that sk matches our stored transport socket */
- if (unlikely(!ovpn_sock->sock || sk != ovpn_sock->sock->sk))
+ if (unlikely(!ovpn_sock->sk || sk != ovpn_sock->sk))
return NULL;
return ovpn_sock;
@@ -335,32 +335,22 @@ out:
/**
* ovpn_udp_send_skb - prepare skb and send it over via UDP
* @peer: the destination peer
- * @sock: the RCU protected peer socket
+ * @sk: peer socket
* @skb: the packet to send
*/
-void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+void ovpn_udp_send_skb(struct ovpn_peer *peer, struct sock *sk,
struct sk_buff *skb)
{
- int ret = -1;
+ int ret;
skb->dev = peer->ovpn->dev;
/* no checksum performed at this layer */
skb->ip_summed = CHECKSUM_NONE;
- /* get socket info */
- if (unlikely(!sock)) {
- net_warn_ratelimited("%s: no sock for remote peer %u\n",
- netdev_name(peer->ovpn->dev), peer->id);
- goto out;
- }
-
/* crypto layer -> transport (UDP) */
- ret = ovpn_udp_output(peer, &peer->dst_cache, sock->sk, skb);
-out:
- if (unlikely(ret < 0)) {
+ ret = ovpn_udp_output(peer, &peer->dst_cache, sk, skb);
+ if (unlikely(ret < 0))
kfree_skb(skb);
- return;
- }
}
static void ovpn_udp_encap_destroy(struct sock *sk)
@@ -383,6 +373,7 @@ static void ovpn_udp_encap_destroy(struct sock *sk)
/**
* ovpn_udp_socket_attach - set udp-tunnel CBs on socket and link it to ovpn
* @ovpn_sock: socket to configure
+ * @sock: the socket container to be passed to setup_udp_tunnel_sock()
* @ovpn: the openvp instance to link
*
* After invoking this function, the sock will be controlled by ovpn so that
@@ -390,7 +381,7 @@ static void ovpn_udp_encap_destroy(struct sock *sk)
*
* Return: 0 on success or a negative error code otherwise
*/
-int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
+int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock, struct socket *sock,
struct ovpn_priv *ovpn)
{
struct udp_tunnel_sock_cfg cfg = {
@@ -398,17 +389,16 @@ int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
.encap_rcv = ovpn_udp_encap_recv,
.encap_destroy = ovpn_udp_encap_destroy,
};
- struct socket *sock = ovpn_sock->sock;
struct ovpn_socket *old_data;
int ret;
/* make sure no pre-existing encapsulation handler exists */
rcu_read_lock();
- old_data = rcu_dereference_sk_user_data(sock->sk);
+ old_data = rcu_dereference_sk_user_data(ovpn_sock->sk);
if (!old_data) {
/* socket is currently unused - we can take it */
rcu_read_unlock();
- setup_udp_tunnel_sock(sock_net(sock->sk), sock, &cfg);
+ setup_udp_tunnel_sock(sock_net(ovpn_sock->sk), sock, &cfg);
return 0;
}
@@ -421,7 +411,7 @@ int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
* Unlikely TCP, a single UDP socket can be used to talk to many remote
* hosts and therefore openvpn instantiates one only for all its peers
*/
- if ((READ_ONCE(udp_sk(sock->sk)->encap_type) == UDP_ENCAP_OVPNINUDP) &&
+ if ((READ_ONCE(udp_sk(ovpn_sock->sk)->encap_type) == UDP_ENCAP_OVPNINUDP) &&
old_data->ovpn == ovpn) {
netdev_dbg(ovpn->dev,
"provided socket already owned by this interface\n");
@@ -442,8 +432,16 @@ int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
*/
void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock)
{
- struct udp_tunnel_sock_cfg cfg = { };
+ struct sock *sk = ovpn_sock->sk;
+
+ /* Re-enable multicast loopback */
+ inet_set_bit(MC_LOOP, sk);
+ /* Disable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
+ inet_dec_convert_csum(sk);
+
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+ WRITE_ONCE(udp_sk(sk)->encap_rcv, NULL);
+ WRITE_ONCE(udp_sk(sk)->encap_destroy, NULL);
- setup_udp_tunnel_sock(sock_net(ovpn_sock->sock->sk), ovpn_sock->sock,
- &cfg);
+ rcu_assign_sk_user_data(sk, NULL);
}
diff --git a/drivers/net/ovpn/udp.h b/drivers/net/ovpn/udp.h
index 9994eb6e0428..fe26fbe25c5a 100644
--- a/drivers/net/ovpn/udp.h
+++ b/drivers/net/ovpn/udp.h
@@ -15,11 +15,11 @@ struct ovpn_peer;
struct ovpn_priv;
struct socket;
-int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
+int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock, struct socket *sock,
struct ovpn_priv *ovpn);
void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock);
-void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+void ovpn_udp_send_skb(struct ovpn_peer *peer, struct sock *sk,
struct sk_buff *skb);
#endif /* _NET_OVPN_UDP_H_ */
diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
index 453a2cf82753..9201ee10a13f 100644
--- a/drivers/net/usb/aqc111.c
+++ b/drivers/net/usb/aqc111.c
@@ -31,11 +31,11 @@ static int aqc111_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value,
USB_RECIP_DEVICE, value, index, data, size);
if (unlikely(ret < size)) {
- ret = ret < 0 ? ret : -ENODATA;
-
netdev_warn(dev->net,
"Failed to read(0x%x) reg index 0x%04x: %d\n",
cmd, index, ret);
+
+ ret = ret < 0 ? ret : -ENODATA;
}
return ret;
@@ -50,11 +50,11 @@ static int aqc111_read_cmd(struct usbnet *dev, u8 cmd, u16 value,
USB_RECIP_DEVICE, value, index, data, size);
if (unlikely(ret < size)) {
- ret = ret < 0 ? ret : -ENODATA;
-
netdev_warn(dev->net,
"Failed to read(0x%x) reg index 0x%04x: %d\n",
cmd, index, ret);
+
+ ret = ret < 0 ? ret : -ENODATA;
}
return ret;
diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index f69d9b902da0..a206ffa76f1b 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -178,6 +178,7 @@ static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc)
{
struct usbnet *dev = netdev_priv(netdev);
unsigned char buff[2];
+ int ret;
netdev_dbg(netdev, "%s phy_id:%02x loc:%02x\n",
__func__, phy_id, loc);
@@ -185,8 +186,10 @@ static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc)
if (phy_id != 0)
return -ENODEV;
- control_read(dev, REQUEST_READ, 0, loc * 2, buff, 0x02,
- CONTROL_TIMEOUT_MS);
+ ret = control_read(dev, REQUEST_READ, 0, loc * 2, buff, 0x02,
+ CONTROL_TIMEOUT_MS);
+ if (ret < 0)
+ return ret;
return (buff[0] | buff[1] << 8);
}
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 2440e30c5bd1..0572f6a9bdb6 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1572,6 +1572,30 @@ vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb,
return (hlen + (hdr.tcp->doff << 2));
}
+static void
+vmxnet3_lro_tunnel(struct sk_buff *skb, __be16 ip_proto)
+{
+ struct udphdr *uh = NULL;
+
+ if (ip_proto == htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)skb->data;
+
+ if (iph->protocol == IPPROTO_UDP)
+ uh = (struct udphdr *)(iph + 1);
+ } else {
+ struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+
+ if (iph->nexthdr == IPPROTO_UDP)
+ uh = (struct udphdr *)(iph + 1);
+ }
+ if (uh) {
+ if (uh->check)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ }
+}
+
static int
vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
struct vmxnet3_adapter *adapter, int quota)
@@ -1885,6 +1909,8 @@ sop_done:
if (segCnt != 0 && mss != 0) {
skb_shinfo(skb)->gso_type = rcd->v4 ?
SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
+ if (encap_lro)
+ vmxnet3_lro_tunnel(skb, skb->protocol);
skb_shinfo(skb)->gso_size = mss;
skb_shinfo(skb)->gso_segs = segCnt;
} else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) {
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
index 8755c5e6a65b..c814fbd756a1 100644
--- a/drivers/net/wwan/mhi_wwan_mbim.c
+++ b/drivers/net/wwan/mhi_wwan_mbim.c
@@ -550,8 +550,8 @@ static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
struct mhi_mbim_context *mbim = ctxt;
- link->session = if_id;
link->mbim = mbim;
+ link->session = mhi_mbim_get_link_mux_id(link->mbim->mdev->mhi_cntrl) + if_id;
link->ndev = ndev;
u64_stats_init(&link->rx_syncp);
u64_stats_init(&link->tx_syncp);
@@ -607,7 +607,7 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
{
struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
struct mhi_mbim_context *mbim;
- int err, link_id;
+ int err;
mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL);
if (!mbim)
@@ -628,11 +628,8 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
/* Number of transfer descriptors determines size of the queue */
mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
- /* Get the corresponding mux_id from mhi */
- link_id = mhi_mbim_get_link_mux_id(cntrl);
-
/* Register wwan link ops with MHI controller representing WWAN instance */
- return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, link_id);
+ return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0);
}
static void mhi_mbim_remove(struct mhi_device *mhi_dev)
diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c
index 91fa082e9cab..fc0a7cb181df 100644
--- a/drivers/net/wwan/t7xx/t7xx_netdev.c
+++ b/drivers/net/wwan/t7xx/t7xx_netdev.c
@@ -302,7 +302,7 @@ static int t7xx_ccmni_wwan_newlink(void *ctxt, struct net_device *dev, u32 if_id
ccmni->ctlb = ctlb;
ccmni->dev = dev;
atomic_set(&ccmni->usage, 0);
- ctlb->ccmni_inst[if_id] = ccmni;
+ WRITE_ONCE(ctlb->ccmni_inst[if_id], ccmni);
ret = register_netdevice(dev);
if (ret)
@@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l
if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni))
return;
+ WRITE_ONCE(ctlb->ccmni_inst[if_id], NULL);
unregister_netdevice(dev);
}
@@ -419,7 +420,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
skb_cb = T7XX_SKB_CB(skb);
netif_id = skb_cb->netif_idx;
- ccmni = ccmni_ctlb->ccmni_inst[netif_id];
+ ccmni = READ_ONCE(ccmni_ctlb->ccmni_inst[netif_id]);
if (!ccmni) {
dev_kfree_skb(skb);
return;
@@ -441,7 +442,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
{
- struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
+ struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
struct netdev_queue *net_queue;
if (netif_running(ccmni->dev) && atomic_read(&ccmni->usage) > 0) {
@@ -453,7 +454,7 @@ static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno
static void t7xx_ccmni_queue_tx_full_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
{
- struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
+ struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
struct netdev_queue *net_queue;
if (atomic_read(&ccmni->usage) > 0) {
@@ -471,7 +472,7 @@ static void t7xx_ccmni_queue_state_notify(struct t7xx_pci_dev *t7xx_dev,
if (ctlb->md_sta != MD_STATE_READY)
return;
- if (!ctlb->ccmni_inst[0]) {
+ if (!READ_ONCE(ctlb->ccmni_inst[0])) {
dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n");
return;
}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 66409bc3a4e0..fa5f19b8d53a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*/
#ifdef CONFIG_DEBUG_INFO_BTF
#define BTF \
+ . = ALIGN(PAGE_SIZE); \
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
BOUNDED_SECTION_BY(.BTF, _BTF) \
} \
- . = ALIGN(4); \
+ . = ALIGN(PAGE_SIZE); \
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
*(.BTF_ids) \
}
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 60d1511b4f4d..70c8b94e797a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -426,8 +426,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
const struct bpf_func_proto *
cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
-const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
#else
static inline void cgroup_bpf_lifetime_notifier_init(void)
@@ -466,12 +464,6 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
-static inline const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
- return NULL;
-}
-
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3f0cc89c0622..5b25d278409b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -346,6 +346,12 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
}
}
+#if IS_ENABLED(CONFIG_DEBUG_KERNEL)
+#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
+#else
+#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+#endif
+
static inline u32 btf_field_type_size(enum btf_field_type type)
{
switch (type) {
@@ -1349,6 +1355,20 @@ u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
+ void *src, u32 len, u64 flags);
+void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
+ void *buffer__opt, u32 buffer__szk);
+
+static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+{
+ u32 size = __bpf_dynptr_size(ptr);
+
+ if (len > size || offset > size - len)
+ return -E2BIG;
+
+ return 0;
+}
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9734544b6957..256274acb1d8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -356,7 +356,11 @@ enum {
INSN_F_SPI_MASK = 0x3f, /* 6 bits */
INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
- INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
+ INSN_F_STACK_ACCESS = BIT(9),
+
+ INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */
+ INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */
+ /* total 12 bits are used now. */
};
static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
@@ -365,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
struct bpf_insn_hist_entry {
u32 idx;
/* insn idx can't be bigger than 1 million */
- u32 prev_idx : 22;
- /* special flags, e.g., whether insn is doing register stack spill/load */
- u32 flags : 10;
+ u32 prev_idx : 20;
+ /* special INSN_F_xxx flags */
+ u32 flags : 12;
/* additional registers that need precision tracking when this
* jump is backtracked, vector of six 10-bit records
*/
@@ -591,6 +595,7 @@ struct bpf_insn_aux_data {
* bpf_fastcall pattern.
*/
u8 fastcall_spills_num:3;
+ u8 arg_prog:4;
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
@@ -838,6 +843,17 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
u32 insn_off,
const char *prefix_fmt, ...);
+#define verifier_bug_if(cond, env, fmt, args...) \
+ ({ \
+ bool __cond = (cond); \
+ if (unlikely(__cond)) { \
+ BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \
+ bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \
+ } \
+ (__cond); \
+ })
+#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)
+
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 544f8f8c3f44..d58e329ac0e7 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -361,10 +361,8 @@ struct dma_buf {
*/
struct module *owner;
-#if IS_ENABLED(CONFIG_DEBUG_FS)
/** @list_node: node for dma_buf accounting and debugging. */
struct list_head list_node;
-#endif
/** @priv: exporter specific private data for this buffer object. */
void *priv;
@@ -609,4 +607,6 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map);
void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map);
int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
+struct dma_buf *dma_buf_iter_begin(void);
+struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf);
#endif /* __DMA_BUF_H__ */
diff --git a/include/net/checksum.h b/include/net/checksum.h
index e57986b173f8..3cbab35de5ab 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -152,7 +152,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr);
+ __wsum diff, bool pseudohdr, bool ipv6);
static __always_inline
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fd404729b115..0b4a2f124d11 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1506,7 +1506,7 @@ union bpf_attr {
__s32 map_token_fd;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1995,11 +1995,15 @@ union bpf_attr {
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2051,7 +2055,8 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
+ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -6068,6 +6073,7 @@ enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
+ BPF_F_IPV6 = (1ULL << 7),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
@@ -6723,6 +6729,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from file_name */
__u64 cookie;
+ __u64 ref_ctr_offset;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 70502f038b92..3a335c50e6e3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o
obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o
obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
+ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
+obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
+endif
CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index db13ee70d94d..96113633e391 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -601,7 +601,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
if (model->ret_size > 0)
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
- size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
+ size = arch_bpf_trampoline_size(model, flags, tlinks, stub_func);
if (size <= 0)
return size ? : -EFAULT;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 324c47ab377a..1d2cf898e21e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -26,6 +26,7 @@
#include <linux/bsearch.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <linux/overflow.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -3957,7 +3958,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* This needs to be kzalloc to zero out padding and unused fields, see
* comment in btf_record_equal.
*/
- rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
+ rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL | __GFP_NOWARN);
if (!rec)
return ERR_PTR(-ENOMEM);
@@ -5583,7 +5584,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
if (id < 0)
continue;
- new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1),
GFP_KERNEL | __GFP_NOWARN);
if (!new_aof) {
ret = -ENOMEM;
@@ -5610,7 +5611,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
if (ret != BTF_FIELD_FOUND)
continue;
- new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1),
GFP_KERNEL | __GFP_NOWARN);
if (!new_aof) {
ret = -ENOMEM;
@@ -5647,7 +5648,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
continue;
parse:
tab_cnt = tab ? tab->cnt : 0;
- new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]),
+ new_tab = krealloc(tab, struct_size(new_tab, types, tab_cnt + 1),
GFP_KERNEL | __GFP_NOWARN);
if (!new_tab) {
ret = -ENOMEM;
@@ -6383,12 +6384,11 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
return prog->aux->attach_btf;
}
-static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
+static bool is_void_or_int_ptr(struct btf *btf, const struct btf_type *t)
{
/* skip modifiers */
t = btf_type_skip_modifiers(btf, t->type, NULL);
-
- return btf_type_is_int(t);
+ return btf_type_is_void(t) || btf_type_is_int(t);
}
u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
@@ -6777,14 +6777,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
}
- if (t->type == 0)
- /* This is a pointer to void.
- * It is the same as scalar from the verifier safety pov.
- * No further pointer walking is allowed.
- */
- return true;
-
- if (is_int_ptr(btf, t))
+ /*
+ * If it's a pointer to void, it's the same as scalar from the verifier
+ * safety POV. Either way, no futher pointer walking is allowed.
+ */
+ if (is_void_or_int_ptr(btf, t))
return true;
/* this is a pointer to another type */
@@ -6830,10 +6827,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* Is this a func with potential NULL args? */
if (strcmp(tname, raw_tp_null_args[i].func))
continue;
- if (raw_tp_null_args[i].mask & (0x1 << (arg * 4)))
+ if (raw_tp_null_args[i].mask & (0x1ULL << (arg * 4)))
info->reg_type |= PTR_MAYBE_NULL;
/* Is the current arg IS_ERR? */
- if (raw_tp_null_args[i].mask & (0x2 << (arg * 4)))
+ if (raw_tp_null_args[i].mask & (0x2ULL << (arg * 4)))
ptr_err_raw_tp = true;
break;
}
@@ -7663,7 +7660,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
return 0;
if (!prog->aux->func_info) {
- bpf_log(log, "Verifier bug\n");
+ verifier_bug(env, "func_info undefined");
return -EFAULT;
}
@@ -7687,7 +7684,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
tname = btf_name_by_offset(btf, fn_t->name_off);
if (prog->aux->func_info_aux[subprog].unreliable) {
- bpf_log(log, "Verifier bug in function %s()\n", tname);
+ verifier_bug(env, "unreliable BTF for function %s()", tname);
return -EFAULT;
}
if (prog_type == BPF_PROG_TYPE_EXT)
@@ -8564,7 +8561,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
/* Grow set */
set = krealloc(tab->sets[hook],
- offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]),
+ struct_size(set, pairs, set_cnt + add_set->cnt),
GFP_KERNEL | __GFP_NOWARN);
if (!set) {
ret = -ENOMEM;
@@ -8850,7 +8847,7 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
}
tab = krealloc(btf->dtor_kfunc_tab,
- offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]),
+ struct_size(tab, dtors, tab_cnt + add_cnt),
GFP_KERNEL | __GFP_NOWARN);
if (!tab) {
ret = -ENOMEM;
@@ -9408,8 +9405,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
tab = btf->struct_ops_tab;
if (!tab) {
- tab = kzalloc(offsetof(struct btf_struct_ops_tab, ops[4]),
- GFP_KERNEL);
+ tab = kzalloc(struct_size(tab, ops, 4), GFP_KERNEL);
if (!tab)
return -ENOMEM;
tab->capacity = 4;
@@ -9422,8 +9418,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
if (tab->cnt == tab->capacity) {
new_tab = krealloc(tab,
- offsetof(struct btf_struct_ops_tab,
- ops[tab->capacity * 2]),
+ struct_size(tab, ops, tab->capacity * 2),
GFP_KERNEL);
if (!new_tab)
return -ENOMEM;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index cad0194552fb..9122c39870bf 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1687,10 +1687,6 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
@@ -2238,10 +2234,6 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_sysctl_get_name:
return &bpf_sysctl_get_name_proto;
@@ -2385,10 +2377,6 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
#ifdef CONFIG_NET
case BPF_FUNC_get_netns_cookie:
@@ -2635,23 +2623,3 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
}
-
-/* Common helpers for cgroup hooks with valid process context. */
-const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
- switch (func_id) {
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
-#ifdef CONFIG_CGROUP_NET_CLASSID
- case BPF_FUNC_get_cgroup_classid:
- return &bpf_get_cgroup_classid_curr_proto;
-#endif
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
- default:
- return NULL;
- }
-}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ba6b6118cf50..c20babbf998f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2358,8 +2358,8 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
return 0;
}
-bool bpf_prog_map_compatible(struct bpf_map *map,
- const struct bpf_prog *fp)
+static bool __bpf_prog_map_compatible(struct bpf_map *map,
+ const struct bpf_prog *fp)
{
enum bpf_prog_type prog_type = resolve_prog_type(fp);
bool ret;
@@ -2368,14 +2368,6 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
if (fp->kprobe_override)
return false;
- /* XDP programs inserted into maps are not guaranteed to run on
- * a particular netdev (and can run outside driver context entirely
- * in the case of devmap and cpumap). Until device checks
- * are implemented, prohibit adding dev-bound programs to program maps.
- */
- if (bpf_prog_is_dev_bound(aux))
- return false;
-
spin_lock(&map->owner.lock);
if (!map->owner.type) {
/* There's no owner yet where we could check for
@@ -2409,6 +2401,19 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
return ret;
}
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp)
+{
+ /* XDP programs inserted into maps are not guaranteed to run on
+ * a particular netdev (and can run outside driver context entirely
+ * in the case of devmap and cpumap). Until device checks
+ * are implemented, prohibit adding dev-bound programs to program maps.
+ */
+ if (bpf_prog_is_dev_bound(fp->aux))
+ return false;
+
+ return __bpf_prog_map_compatible(map, fp);
+}
+
static int bpf_check_tail_call(const struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
@@ -2421,7 +2426,7 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
if (!map_type_contains_progs(map))
continue;
- if (!bpf_prog_map_compatible(map, fp)) {
+ if (!__bpf_prog_map_compatible(map, fp)) {
ret = -EINVAL;
goto out;
}
@@ -2469,7 +2474,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
/* In case of BPF to BPF calls, verifier did all the prep
* work with regards to JITing, etc.
*/
- bool jit_needed = false;
+ bool jit_needed = fp->jit_requested;
if (fp->bpf_func)
goto finalize;
diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c
new file mode 100644
index 000000000000..4dd7ef7c145c
--- /dev/null
+++ b/kernel/bpf/dmabuf_iter.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Google LLC */
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <linux/dma-buf.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+
+static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (*pos)
+ return NULL;
+
+ return dma_buf_iter_begin();
+}
+
+static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct dma_buf *dmabuf = v;
+
+ ++*pos;
+
+ return dma_buf_iter_next(dmabuf);
+}
+
+struct bpf_iter__dmabuf {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct dma_buf *, dmabuf);
+};
+
+static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter_meta meta = {
+ .seq = seq,
+ };
+ struct bpf_iter__dmabuf ctx = {
+ .meta = &meta,
+ .dmabuf = v,
+ };
+ struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop);
+
+ if (prog)
+ return bpf_iter_run_prog(prog, &ctx);
+
+ return 0;
+}
+
+static int dmabuf_iter_seq_show(struct seq_file *seq, void *v)
+{
+ return __dmabuf_seq_show(seq, v, false);
+}
+
+static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v)
+{
+ struct dma_buf *dmabuf = v;
+
+ if (dmabuf)
+ dma_buf_put(dmabuf);
+}
+
+static const struct seq_operations dmabuf_iter_seq_ops = {
+ .start = dmabuf_iter_seq_start,
+ .next = dmabuf_iter_seq_next,
+ .stop = dmabuf_iter_seq_stop,
+ .show = dmabuf_iter_seq_show,
+};
+
+static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
+ struct seq_file *seq)
+{
+ seq_puts(seq, "dmabuf iter\n");
+}
+
+static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
+ .seq_ops = &dmabuf_iter_seq_ops,
+ .init_seq_private = NULL,
+ .fini_seq_private = NULL,
+ .seq_priv_size = 0,
+};
+
+static struct bpf_iter_reg bpf_dmabuf_reg_info = {
+ .target = "dmabuf",
+ .feature = BPF_ITER_RESCHED,
+ .show_fdinfo = bpf_iter_dmabuf_show_fdinfo,
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__dmabuf, dmabuf),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &dmabuf_iter_seq_info,
+};
+
+DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf)
+BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf)
+
+static int __init dmabuf_iter_init(void)
+{
+ bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0];
+ return bpf_iter_reg_target(&bpf_dmabuf_reg_info);
+}
+
+late_initcall(dmabuf_iter_init);
+
+struct bpf_iter_dmabuf {
+ /*
+ * opaque iterator state; having __u64 here allows to preserve correct
+ * alignment requirements in vmlinux.h, generated from BTF
+ */
+ __u64 __opaque[1];
+} __aligned(8);
+
+/* Non-opaque version of bpf_iter_dmabuf */
+struct bpf_iter_dmabuf_kern {
+ struct dma_buf *dmabuf;
+} __aligned(8);
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it)
+{
+ struct bpf_iter_dmabuf_kern *kit = (void *)it;
+
+ BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
+ BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
+
+ kit->dmabuf = NULL;
+ return 0;
+}
+
+__bpf_kfunc struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it)
+{
+ struct bpf_iter_dmabuf_kern *kit = (void *)it;
+
+ if (kit->dmabuf)
+ kit->dmabuf = dma_buf_iter_next(kit->dmabuf);
+ else
+ kit->dmabuf = dma_buf_iter_begin();
+
+ return kit->dmabuf;
+}
+
+__bpf_kfunc void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it)
+{
+ struct bpf_iter_dmabuf_kern *kit = (void *)it;
+
+ if (kit->dmabuf)
+ dma_buf_put(kit->dmabuf);
+}
+
+__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 92b606d60020..71f9931ac64c 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -175,20 +175,30 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
}
+static inline bool is_fd_htab(const struct bpf_htab *htab)
+{
+ return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS;
+}
+
+static inline void *htab_elem_value(struct htab_elem *l, u32 key_size)
+{
+ return l->key + round_up(key_size, 8);
+}
+
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
void __percpu *pptr)
{
- *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr;
+ *(void __percpu **)htab_elem_value(l, key_size) = pptr;
}
static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
{
- return *(void __percpu **)(l->key + roundup(key_size, 8));
+ return *(void __percpu **)htab_elem_value(l, key_size);
}
static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
{
- return *(void **)(l->key + roundup(map->key_size, 8));
+ return *(void **)htab_elem_value(l, map->key_size);
}
static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
@@ -196,9 +206,13 @@ static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
}
+/* Both percpu and fd htab support in-place update, so no need for
+ * extra elem. LRU itself can remove the least used element, so
+ * there is no need for an extra elem during map_update.
+ */
static bool htab_has_extra_elems(struct bpf_htab *htab)
{
- return !htab_is_percpu(htab) && !htab_is_lru(htab);
+ return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
}
static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
@@ -215,10 +229,10 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
elem = get_htab_elem(htab, i);
if (btf_record_has_field(htab->map.record, BPF_TIMER))
bpf_obj_free_timer(htab->map.record,
- elem->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(elem, htab->map.key_size));
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
bpf_obj_free_workqueue(htab->map.record,
- elem->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(elem, htab->map.key_size));
cond_resched();
}
}
@@ -245,7 +259,8 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab)
cond_resched();
}
} else {
- bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
+ bpf_obj_free_fields(htab->map.record,
+ htab_elem_value(elem, htab->map.key_size));
cond_resched();
}
cond_resched();
@@ -453,8 +468,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
- bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
- attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
/* percpu_lru means each cpu has its own LRU list.
* it is different from BPF_MAP_TYPE_PERCPU_HASH where
* the map's value itself is percpu. percpu_lru has
@@ -549,10 +562,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (err)
goto free_map_locked;
- if (!percpu && !lru) {
- /* lru itself can remove the least used element, so
- * there is no need for an extra elem during map_update.
- */
+ if (htab_has_extra_elems(htab)) {
err = alloc_extra_elems(htab);
if (err)
goto free_prealloc;
@@ -670,7 +680,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
struct htab_elem *l = __htab_map_lookup_elem(map, key);
if (l)
- return l->key + round_up(map->key_size, 8);
+ return htab_elem_value(l, map->key_size);
return NULL;
}
@@ -709,7 +719,7 @@ static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
if (l) {
if (mark)
bpf_lru_node_set_ref(&l->lru_node);
- return l->key + round_up(map->key_size, 8);
+ return htab_elem_value(l, map->key_size);
}
return NULL;
@@ -763,7 +773,7 @@ static void check_and_free_fields(struct bpf_htab *htab,
for_each_possible_cpu(cpu)
bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
} else {
- void *map_value = elem->key + round_up(htab->map.key_size, 8);
+ void *map_value = htab_elem_value(elem, htab->map.key_size);
bpf_obj_free_fields(htab->map.record, map_value);
}
@@ -968,8 +978,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
{
- return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
- BITS_PER_LONG == 64;
+ return is_fd_htab(htab) && BITS_PER_LONG == 64;
}
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
@@ -1039,11 +1048,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
htab_elem_set_ptr(l_new, key_size, pptr);
} else if (fd_htab_map_needs_adjust(htab)) {
size = round_up(size, 8);
- memcpy(l_new->key + round_up(key_size, 8), value, size);
+ memcpy(htab_elem_value(l_new, key_size), value, size);
} else {
- copy_map_value(&htab->map,
- l_new->key + round_up(key_size, 8),
- value);
+ copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value);
}
l_new->hash = hash;
@@ -1072,10 +1079,9 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct htab_elem *l_new = NULL, *l_old;
+ struct htab_elem *l_new, *l_old;
struct hlist_nulls_head *head;
unsigned long flags;
- void *old_map_ptr;
struct bucket *b;
u32 key_size, hash;
int ret;
@@ -1106,7 +1112,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (l_old) {
/* grab the element lock and update value in place */
copy_map_value_locked(map,
- l_old->key + round_up(key_size, 8),
+ htab_elem_value(l_old, key_size),
value, false);
return 0;
}
@@ -1134,7 +1140,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
* and update element in place
*/
copy_map_value_locked(map,
- l_old->key + round_up(key_size, 8),
+ htab_elem_value(l_old, key_size),
value, false);
ret = 0;
goto err;
@@ -1156,24 +1162,14 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
hlist_nulls_del_rcu(&l_old->hash_node);
/* l_old has already been stashed in htab->extra_elems, free
- * its special fields before it is available for reuse. Also
- * save the old map pointer in htab of maps before unlock
- * and release it after unlock.
+ * its special fields before it is available for reuse.
*/
- old_map_ptr = NULL;
- if (htab_is_prealloc(htab)) {
- if (map->ops->map_fd_put_ptr)
- old_map_ptr = fd_htab_map_get_ptr(map, l_old);
+ if (htab_is_prealloc(htab))
check_and_free_fields(htab, l_old);
- }
}
htab_unlock_bucket(b, flags);
- if (l_old) {
- if (old_map_ptr)
- map->ops->map_fd_put_ptr(map, old_map_ptr, true);
- if (!htab_is_prealloc(htab))
- free_htab_elem(htab, l_old);
- }
+ if (l_old && !htab_is_prealloc(htab))
+ free_htab_elem(htab, l_old);
return 0;
err:
htab_unlock_bucket(b, flags);
@@ -1220,8 +1216,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
l_new = prealloc_lru_pop(htab, key, hash);
if (!l_new)
return -ENOMEM;
- copy_map_value(&htab->map,
- l_new->key + round_up(map->key_size, 8), value);
+ copy_map_value(&htab->map, htab_elem_value(l_new, map->key_size), value);
ret = htab_lock_bucket(b, &flags);
if (ret)
@@ -1255,13 +1250,14 @@ err_lock_bucket:
return ret;
}
-static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
void *value, u64 map_flags,
- bool onallcpus)
+ bool percpu, bool onallcpus)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct htab_elem *l_new = NULL, *l_old;
+ struct htab_elem *l_new, *l_old;
struct hlist_nulls_head *head;
+ void *old_map_ptr = NULL;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -1292,21 +1288,29 @@ static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
goto err;
if (l_old) {
- /* per-cpu hash map can update value in-place */
- pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
- value, onallcpus);
+ /* Update value in-place */
+ if (percpu) {
+ pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
+ value, onallcpus);
+ } else {
+ void **inner_map_pptr = htab_elem_value(l_old, key_size);
+
+ old_map_ptr = *inner_map_pptr;
+ WRITE_ONCE(*inner_map_pptr, *(void **)value);
+ }
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, true, onallcpus, NULL);
+ hash, percpu, onallcpus, NULL);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
}
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
}
- ret = 0;
err:
htab_unlock_bucket(b, flags);
+ if (old_map_ptr)
+ map->ops->map_fd_put_ptr(map, old_map_ptr, true);
return ret;
}
@@ -1383,7 +1387,7 @@ err_lock_bucket:
static long htab_percpu_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags)
{
- return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
+ return htab_map_update_elem_in_place(map, key, value, map_flags, true, false);
}
static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
@@ -1500,10 +1504,10 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
/* We only free timer on uref dropping to zero */
if (btf_record_has_field(htab->map.record, BPF_TIMER))
bpf_obj_free_timer(htab->map.record,
- l->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(l, htab->map.key_size));
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
bpf_obj_free_workqueue(htab->map.record,
- l->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(l, htab->map.key_size));
}
cond_resched_rcu();
}
@@ -1615,15 +1619,12 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
off += roundup_value_size;
}
} else {
- u32 roundup_key_size = round_up(map->key_size, 8);
+ void *src = htab_elem_value(l, map->key_size);
if (flags & BPF_F_LOCK)
- copy_map_value_locked(map, value, l->key +
- roundup_key_size,
- true);
+ copy_map_value_locked(map, value, src, true);
else
- copy_map_value(map, value, l->key +
- roundup_key_size);
+ copy_map_value(map, value, src);
/* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, value);
}
@@ -1680,12 +1681,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
bool is_percpu)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
void __user *uvalues = u64_to_user_ptr(attr->batch.values);
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size, map_id;
+ u32 bucket_cnt, total, key_size, value_size;
struct htab_elem *node_to_free = NULL;
u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
@@ -1720,7 +1721,6 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
return -ENOENT;
key_size = htab->map.key_size;
- roundup_key_size = round_up(htab->map.key_size, 8);
value_size = htab->map.value_size;
size = round_up(value_size, 8);
if (is_percpu)
@@ -1812,8 +1812,8 @@ again_nocopy:
off += size;
}
} else {
- value = l->key + roundup_key_size;
- if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ value = htab_elem_value(l, key_size);
+ if (is_fd_htab(htab)) {
struct bpf_map **inner_map = value;
/* Actual value is the id of the inner map */
@@ -2063,11 +2063,11 @@ static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
{
struct bpf_iter_seq_hash_map_info *info = seq->private;
- u32 roundup_key_size, roundup_value_size;
struct bpf_iter__bpf_map_elem ctx = {};
struct bpf_map *map = info->map;
struct bpf_iter_meta meta;
int ret = 0, off = 0, cpu;
+ u32 roundup_value_size;
struct bpf_prog *prog;
void __percpu *pptr;
@@ -2077,10 +2077,9 @@ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
ctx.meta = &meta;
ctx.map = info->map;
if (elem) {
- roundup_key_size = round_up(map->key_size, 8);
ctx.key = elem->key;
if (!info->percpu_value_buf) {
- ctx.value = elem->key + roundup_key_size;
+ ctx.value = htab_elem_value(elem, map->key_size);
} else {
roundup_value_size = round_up(map->value_size, 8);
pptr = htab_elem_get_ptr(elem, map->key_size);
@@ -2165,7 +2164,6 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
struct htab_elem *elem;
- u32 roundup_key_size;
int i, num_elems = 0;
void __percpu *pptr;
struct bucket *b;
@@ -2180,7 +2178,6 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
is_percpu = htab_is_percpu(htab);
- roundup_key_size = round_up(map->key_size, 8);
/* migration has been disabled, so percpu value prepared here will be
* the same as the one seen by the bpf program with
* bpf_map_lookup_elem().
@@ -2196,7 +2193,7 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
pptr = htab_elem_get_ptr(elem, map->key_size);
val = this_cpu_ptr(pptr);
} else {
- val = elem->key + roundup_key_size;
+ val = htab_elem_value(elem, map->key_size);
}
num_elems++;
ret = callback_fn((u64)(long)map, (u64)(long)key,
@@ -2411,8 +2408,8 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
ret = __htab_lru_percpu_map_update_elem(map, key, value,
map_flags, true);
else
- ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
- true);
+ ret = htab_map_update_elem_in_place(map, key, value, map_flags,
+ true, true);
rcu_read_unlock();
return ret;
@@ -2536,24 +2533,23 @@ int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
return ret;
}
-/* only called from syscall */
+/* Only called from syscall */
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags)
{
void *ptr;
int ret;
- u32 ufd = *(u32 *)value;
- ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
+ ptr = map->ops->map_fd_get_ptr(map, map_file, *(int *)value);
if (IS_ERR(ptr))
return PTR_ERR(ptr);
/* The htab bucket lock is always held during update operations in fd
* htab map, and the following rcu_read_lock() is only used to avoid
- * the WARN_ON_ONCE in htab_map_update_elem().
+ * the WARN_ON_ONCE in htab_map_update_elem_in_place().
*/
rcu_read_lock();
- ret = htab_map_update_elem(map, key, &ptr, map_flags);
+ ret = htab_map_update_elem_in_place(map, key, &ptr, map_flags, false, false);
rcu_read_unlock();
if (ret)
map->ops->map_fd_put_ptr(map, ptr, false);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e3a2662f4e33..b71e428ad936 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -23,6 +23,7 @@
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
#include <linux/kasan.h>
+#include <linux/bpf_verifier.h>
#include "../../lib/kstrtox.h"
@@ -129,7 +130,8 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
{
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
}
@@ -1713,16 +1715,6 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
memset(ptr, 0, sizeof(*ptr));
}
-static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
-{
- u32 size = __bpf_dynptr_size(ptr);
-
- if (len > size || offset > size - len)
- return -E2BIG;
-
- return 0;
-}
-
BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
{
int err;
@@ -1809,8 +1801,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
.arg5_type = ARG_ANYTHING,
};
-static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
- u32 len, u64 flags)
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
+ u32 len, u64 flags)
{
enum bpf_dynptr_type type;
int err;
@@ -1912,6 +1904,12 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
+const struct bpf_func_proto bpf_perf_event_read_proto __weak;
+const struct bpf_func_proto bpf_send_signal_proto __weak;
+const struct bpf_func_proto bpf_send_signal_thread_proto __weak;
+const struct bpf_func_proto bpf_get_task_stack_sleepable_proto __weak;
+const struct bpf_func_proto bpf_get_task_stack_proto __weak;
+const struct bpf_func_proto bpf_get_branch_snapshot_proto __weak;
const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -1965,6 +1963,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_ns_current_pid_tgid:
return &bpf_get_ns_current_pid_tgid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
default:
break;
}
@@ -2022,7 +2022,21 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_get_current_ancestor_cgroup_id:
return &bpf_get_current_ancestor_cgroup_id_proto;
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
#endif
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_curr_proto;
+#endif
+ case BPF_FUNC_task_storage_get:
+ if (bpf_prog_check_recur(prog))
+ return &bpf_task_storage_get_recur_proto;
+ return &bpf_task_storage_get_proto;
+ case BPF_FUNC_task_storage_delete:
+ if (bpf_prog_check_recur(prog))
+ return &bpf_task_storage_delete_recur_proto;
+ return &bpf_task_storage_delete_proto;
default:
break;
}
@@ -2037,6 +2051,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_task_proto;
case BPF_FUNC_get_current_task_btf:
return &bpf_get_current_task_btf_proto;
+ case BPF_FUNC_get_current_comm:
+ return &bpf_get_current_comm_proto;
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
@@ -2047,6 +2063,10 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_probe_read_kernel_str:
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_str_proto;
+ case BPF_FUNC_copy_from_user:
+ return &bpf_copy_from_user_proto;
+ case BPF_FUNC_copy_from_user_task:
+ return &bpf_copy_from_user_task_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_snprintf:
@@ -2057,6 +2077,19 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return bpf_get_trace_vprintk_proto();
case BPF_FUNC_perf_event_read_value:
return bpf_get_perf_event_read_value_proto();
+ case BPF_FUNC_perf_event_read:
+ return &bpf_perf_event_read_proto;
+ case BPF_FUNC_send_signal:
+ return &bpf_send_signal_proto;
+ case BPF_FUNC_send_signal_thread:
+ return &bpf_send_signal_thread_proto;
+ case BPF_FUNC_get_task_stack:
+ return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
+ : &bpf_get_task_stack_proto;
+ case BPF_FUNC_get_branch_snapshot:
+ return &bpf_get_branch_snapshot_proto;
+ case BPF_FUNC_find_vma:
+ return &bpf_find_vma_proto;
default:
return NULL;
}
@@ -2293,6 +2326,26 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
return __bpf_list_del(head, true);
}
+__bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
+{
+ struct list_head *h = (struct list_head *)head;
+
+ if (list_empty(h) || unlikely(!h->next))
+ return NULL;
+
+ return (struct bpf_list_node *)h->next;
+}
+
+__bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
+{
+ struct list_head *h = (struct list_head *)head;
+
+ if (list_empty(h) || unlikely(!h->next))
+ return NULL;
+
+ return (struct bpf_list_node *)h->prev;
+}
+
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
struct bpf_rb_node *node)
{
@@ -2366,6 +2419,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
return (struct bpf_rb_node *)rb_first_cached(r);
}
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+
+ return (struct bpf_rb_node *)r->rb_root.rb_node;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+ struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+ if (READ_ONCE(node_internal->owner) != root)
+ return NULL;
+
+ return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+ struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+ if (READ_ONCE(node_internal->owner) != root)
+ return NULL;
+
+ return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
+}
+
/**
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
* kfunc which is not stored in a map as a kptr, must be released by calling
@@ -2923,9 +3003,9 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
__bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
int (callback_fn)(void *map, int *key, void *value),
unsigned int flags,
- void *aux__ign)
+ void *aux__prog)
{
- struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__ign;
+ struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__prog;
struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
if (flags)
@@ -3194,6 +3274,10 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
local_irq_restore(*flags__irq_flag);
}
+__bpf_kfunc void __bpf_trap(void)
+{
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
@@ -3209,11 +3293,16 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)
#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
@@ -3294,6 +3383,20 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_local_irq_save)
BTF_ID_FLAGS(func, bpf_local_irq_restore)
+BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr)
+BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr)
+BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
+BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
+BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+#ifdef CONFIG_DMA_SHARED_BUFFER
+BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
+#endif
+BTF_ID_FLAGS(func, __bpf_trap)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 64c3393e8270..4b5f29168618 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -36,6 +36,7 @@
#include <linux/memcontrol.h>
#include <linux/trace_events.h>
#include <linux/tracepoint.h>
+#include <linux/overflow.h>
#include <net/netfilter/nf_bpf_link.h>
#include <net/netkit.h>
@@ -693,7 +694,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
if (IS_ERR_OR_NULL(rec))
return NULL;
- size = offsetof(struct btf_record, fields[rec->cnt]);
+ size = struct_size(rec, fields, rec->cnt);
new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN);
if (!new_rec)
return ERR_PTR(-ENOMEM);
@@ -748,7 +749,7 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r
return false;
if (rec_a->cnt != rec_b->cnt)
return false;
- size = offsetof(struct btf_record, fields[rec_a->cnt]);
+ size = struct_size(rec_a, fields, rec_a->cnt);
/* btf_parse_fields uses kzalloc to allocate a btf_record, so unused
* members are zeroed out. So memcmp is safe to do without worrying
* about padding/unused fields.
@@ -3799,14 +3800,14 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
struct bpf_link_info *info)
{
+ u64 ref_ctr_offset, offset;
char __user *uname;
- u64 addr, offset;
u32 ulen, type;
int err;
uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
ulen = info->perf_event.uprobe.name_len;
- err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
+ err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset,
&type, NULL);
if (err)
return err;
@@ -3818,6 +3819,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
info->perf_event.uprobe.name_len = ulen;
info->perf_event.uprobe.offset = offset;
info->perf_event.uprobe.cookie = event->bpf_cookie;
+ info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset;
return 0;
}
#endif
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 81d6cf90584a..941d0d2427e3 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -7,14 +7,46 @@
#include <linux/kobject.h>
#include <linux/init.h>
#include <linux/sysfs.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/btf.h>
/* See scripts/link-vmlinux.sh, gen_btf() func for details */
extern char __start_BTF[];
extern char __stop_BTF[];
+static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
+ size_t vm_size = vma->vm_end - vma->vm_start;
+ phys_addr_t addr = virt_to_phys(__start_BTF);
+ unsigned long pfn = addr >> PAGE_SHIFT;
+
+ if (attr->private != __start_BTF || !PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ if (vma->vm_pgoff)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
+ return -EACCES;
+
+ if (pfn + pages < pfn)
+ return -EINVAL;
+
+ if ((vm_size >> PAGE_SHIFT) > pages)
+ return -EINVAL;
+
+ vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
+ return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
+}
+
static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
.attr = { .name = "vmlinux", .mode = 0444, },
.read_new = sysfs_bin_attr_simple_read,
+ .mmap = btf_sysfs_vmlinux_mmap,
};
struct kobject *btf_kobj;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 54c6953a8b84..a7d6e0c5928b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -322,6 +322,7 @@ struct bpf_kfunc_call_arg_meta {
struct btf *arg_btf;
u32 arg_btf_id;
bool arg_owning_ref;
+ bool arg_prog;
struct {
struct btf_field *field;
@@ -1923,11 +1924,8 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
u32 steps = 0;
while (topmost && topmost->loop_entry) {
- if (steps++ > st->dfs_depth) {
- WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n");
- verbose(env, "verifier bug: infinite loop in get_loop_entry()\n");
+ if (verifier_bug_if(steps++ > st->dfs_depth, env, "infinite loop"))
return ERR_PTR(-EFAULT);
- }
topmost = topmost->loop_entry;
}
return topmost;
@@ -3459,12 +3457,11 @@ static int mark_reg_read(struct bpf_verifier_env *env,
/* if read wasn't screened by an earlier write ... */
if (writes && state->live & REG_LIVE_WRITTEN)
break;
- if (parent->live & REG_LIVE_DONE) {
- verbose(env, "verifier BUG type %s var_off %lld off %d\n",
- reg_type_str(env, parent->type),
- parent->var_off.value, parent->off);
+ if (verifier_bug_if(parent->live & REG_LIVE_DONE, env,
+ "type %s var_off %lld off %d",
+ reg_type_str(env, parent->type),
+ parent->var_off.value, parent->off))
return -EFAULT;
- }
/* The first condition is more likely to be true than the
* second, checked it first.
*/
@@ -3649,16 +3646,16 @@ static int insn_def_regno(const struct bpf_insn *insn)
case BPF_ST:
return -1;
case BPF_STX:
- if ((BPF_MODE(insn->code) == BPF_ATOMIC ||
- BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) &&
- (insn->imm & BPF_FETCH)) {
+ if (BPF_MODE(insn->code) == BPF_ATOMIC ||
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
if (insn->imm == BPF_CMPXCHG)
return BPF_REG_0;
- else
+ else if (insn->imm == BPF_LOAD_ACQ)
+ return insn->dst_reg;
+ else if (insn->imm & BPF_FETCH)
return insn->src_reg;
- } else {
- return -1;
}
+ return -1;
default:
return insn->dst_reg;
}
@@ -3857,14 +3854,14 @@ static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_s
/* atomic instructions push insn_flags twice, for READ and
* WRITE sides, but they should agree on stack slot
*/
- WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
- (env->cur_hist_ent->flags & insn_flags) != insn_flags,
- "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
- env->insn_idx, env->cur_hist_ent->flags, insn_flags);
+ verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
+ (env->cur_hist_ent->flags & insn_flags) != insn_flags,
+ env, "insn history: insn_idx %d cur flags %x new flags %x",
+ env->insn_idx, env->cur_hist_ent->flags, insn_flags);
env->cur_hist_ent->flags |= insn_flags;
- WARN_ONCE(env->cur_hist_ent->linked_regs != 0,
- "verifier insn history bug: insn_idx %d linked_regs != 0: %#llx\n",
- env->insn_idx, env->cur_hist_ent->linked_regs);
+ verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
+ "insn history: insn_idx %d linked_regs: %#llx",
+ env->insn_idx, env->cur_hist_ent->linked_regs);
env->cur_hist_ent->linked_regs = linked_regs;
return 0;
}
@@ -3987,8 +3984,7 @@ static inline u32 bt_empty(struct backtrack_state *bt)
static inline int bt_subprog_enter(struct backtrack_state *bt)
{
if (bt->frame == MAX_CALL_FRAMES - 1) {
- verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
return -EFAULT;
}
bt->frame++;
@@ -3998,8 +3994,7 @@ static inline int bt_subprog_enter(struct backtrack_state *bt)
static inline int bt_subprog_exit(struct backtrack_state *bt)
{
if (bt->frame == 0) {
- verbose(bt->env, "BUG subprog exit from frame 0\n");
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(bt->env, "subprog exit from frame 0");
return -EFAULT;
}
bt->frame--;
@@ -4277,14 +4272,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* should be literally next instruction in
* caller program
*/
- WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
+ verifier_bug_if(idx + 1 != subseq_idx, env,
+ "extra insn from subprog");
/* r1-r5 are invalidated after subprog call,
* so for global func call it shouldn't be set
* anymore
*/
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "global subprog unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
/* global subprog always sets R0 */
@@ -4298,16 +4294,17 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* the current frame should be zero by now
*/
if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "static subprog unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
/* we are now tracking register spills correctly,
* so any instance of leftover slots is a bug
*/
if (bt_stack_mask(bt) != 0) {
- verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
+ verifier_bug(env,
+ "static subprog leftover stack slots %llx",
+ bt_stack_mask(bt));
return -EFAULT;
}
/* propagate r1-r5 to the caller */
@@ -4330,13 +4327,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* not actually arguments passed directly to callback subprogs
*/
if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "callback unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
if (bt_stack_mask(bt) != 0) {
- verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
+ verifier_bug(env, "callback leftover stack slots %llx",
+ bt_stack_mask(bt));
return -EFAULT;
}
/* clear r1-r5 in callback subprog's mask */
@@ -4355,11 +4352,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
/* regular helper call sets R0 */
bt_clear_reg(bt, BPF_REG_0);
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- /* if backtracing was looking for registers R1-R5
+ /* if backtracking was looking for registers R1-R5
* they should have been found already.
*/
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking call unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
} else if (opcode == BPF_EXIT) {
@@ -4377,8 +4374,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
bt_clear_reg(bt, i);
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking exit unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
@@ -4413,8 +4410,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* before it would be equally necessary to
* propagate it to dreg.
*/
- bt_set_reg(bt, dreg);
- bt_set_reg(bt, sreg);
+ if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
+ bt_set_reg(bt, sreg);
+ if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
+ bt_set_reg(bt, dreg);
} else if (BPF_SRC(insn->code) == BPF_K) {
/* dreg <cond> K
* Only dreg still needs precision before
@@ -4719,9 +4718,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
return 0;
}
- verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
- st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
+ st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
return -EFAULT;
}
@@ -4757,8 +4755,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
* It means the backtracking missed the spot where
* particular register was initialized with a constant.
*/
- verbose(env, "BUG backtracking idx %d\n", i);
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking idx %d", i);
return -EFAULT;
}
}
@@ -4783,12 +4780,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
for_each_set_bit(i, mask, 64) {
- if (i >= func->allocated_stack / BPF_REG_SIZE) {
- verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
- i, func->allocated_stack / BPF_REG_SIZE);
- WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
+ if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
+ env, "stack slot %d, total slots %d",
+ i, func->allocated_stack / BPF_REG_SIZE))
return -EFAULT;
- }
if (!is_spilled_scalar_reg(&func->stack[i])) {
bt_clear_frame_slot(bt, fr, i);
@@ -6561,21 +6556,18 @@ continue_func:
/* find the callee */
next_insn = i + insn[i].imm + 1;
sidx = find_subprog(env, next_insn);
- if (sidx < 0) {
- WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
- next_insn);
+ if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
return -EFAULT;
- }
if (subprog[sidx].is_async_cb) {
if (subprog[sidx].has_tail_call) {
- verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+ verifier_bug(env, "subprog has tail_call and async cb");
return -EFAULT;
}
/* async callbacks don't increase bpf prog stack size unless called directly */
if (!bpf_pseudo_call(insn + i))
continue;
if (subprog[sidx].is_exception_cb) {
- verbose(env, "insn %d cannot call exception cb directly\n", i);
+ verbose(env, "insn %d cannot call exception cb directly", i);
return -EINVAL;
}
}
@@ -6675,11 +6667,8 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
int start = idx + insn->imm + 1, subprog;
subprog = find_subprog(env, start);
- if (subprog < 0) {
- WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
- start);
+ if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
return -EFAULT;
- }
return env->subprog_info[subprog].stack_depth;
}
#endif
@@ -7984,7 +7973,7 @@ static int check_stack_range_initialized(
slot = -i - 1;
spi = slot / BPF_REG_SIZE;
if (state->allocated_stack <= slot) {
- verbose(env, "verifier bug: allocated_stack too small\n");
+ verbose(env, "allocated_stack too small\n");
return -EFAULT;
}
@@ -8413,7 +8402,7 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
return -EINVAL;
}
if (meta->map_ptr) {
- verbose(env, "verifier bug. Two map pointers in a timer helper\n");
+ verifier_bug(env, "Two map pointers in a timer helper");
return -EFAULT;
}
meta->map_uid = reg->map_uid;
@@ -10285,8 +10274,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls
}
if (state->frame[state->curframe + 1]) {
- verbose(env, "verifier bug. Frame %d already allocated\n",
- state->curframe + 1);
+ verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
return -EFAULT;
}
@@ -10400,8 +10388,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
if (err)
return err;
} else {
- bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n",
- i, arg->arg_type);
+ verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type);
return -EFAULT;
}
}
@@ -10464,13 +10451,13 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
env->subprog_info[subprog].is_cb = true;
if (bpf_pseudo_kfunc_call(insn) &&
!is_callback_calling_kfunc(insn->imm)) {
- verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
- func_id_name(insn->imm), insn->imm);
+ verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
+ func_id_name(insn->imm), insn->imm);
return -EFAULT;
} else if (!bpf_pseudo_kfunc_call(insn) &&
!is_callback_calling_function(insn->imm)) { /* helper */
- verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
- func_id_name(insn->imm), insn->imm);
+ verifier_bug(env, "helper %s#%d not marked as callback-calling",
+ func_id_name(insn->imm), insn->imm);
return -EFAULT;
}
@@ -10522,10 +10509,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
target_insn = *insn_idx + insn->imm + 1;
subprog = find_subprog(env, target_insn);
- if (subprog < 0) {
- verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
+ if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
+ target_insn))
return -EFAULT;
- }
caller = state->frame[state->curframe];
err = btf_check_subprog_call(env, subprog, caller->regs);
@@ -11124,7 +11110,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
fmt_map_off);
if (err) {
- verbose(env, "verifier bug\n");
+ verbose(env, "failed to retrieve map value address\n");
return -EFAULT;
}
fmt = (char *)(long)fmt_addr + fmt_map_off;
@@ -11897,6 +11883,11 @@ static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param
return btf_param_match_suffix(btf, arg, "__irq_flag");
}
+static bool is_kfunc_arg_prog(const struct btf *btf, const struct btf_param *arg)
+{
+ return btf_param_match_suffix(btf, arg, "__prog");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -11987,6 +11978,16 @@ static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_p
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
}
+static bool is_rbtree_node_type(const struct btf_type *t)
+{
+ return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
+}
+
+static bool is_list_node_type(const struct btf_type *t)
+{
+ return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
+}
+
static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
const struct btf_param *arg)
{
@@ -12069,6 +12070,8 @@ enum special_kfunc_type {
KF_bpf_list_push_back_impl,
KF_bpf_list_pop_front,
KF_bpf_list_pop_back,
+ KF_bpf_list_front,
+ KF_bpf_list_back,
KF_bpf_cast_to_kern_ctx,
KF_bpf_rdonly_cast,
KF_bpf_rcu_read_lock,
@@ -12076,6 +12079,9 @@ enum special_kfunc_type {
KF_bpf_rbtree_remove,
KF_bpf_rbtree_add_impl,
KF_bpf_rbtree_first,
+ KF_bpf_rbtree_root,
+ KF_bpf_rbtree_left,
+ KF_bpf_rbtree_right,
KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp,
KF_bpf_dynptr_slice,
@@ -12101,41 +12107,9 @@ enum special_kfunc_type {
KF_bpf_res_spin_unlock,
KF_bpf_res_spin_lock_irqsave,
KF_bpf_res_spin_unlock_irqrestore,
+ KF___bpf_trap,
};
-BTF_SET_START(special_kfunc_set)
-BTF_ID(func, bpf_obj_new_impl)
-BTF_ID(func, bpf_obj_drop_impl)
-BTF_ID(func, bpf_refcount_acquire_impl)
-BTF_ID(func, bpf_list_push_front_impl)
-BTF_ID(func, bpf_list_push_back_impl)
-BTF_ID(func, bpf_list_pop_front)
-BTF_ID(func, bpf_list_pop_back)
-BTF_ID(func, bpf_cast_to_kern_ctx)
-BTF_ID(func, bpf_rdonly_cast)
-BTF_ID(func, bpf_rbtree_remove)
-BTF_ID(func, bpf_rbtree_add_impl)
-BTF_ID(func, bpf_rbtree_first)
-#ifdef CONFIG_NET
-BTF_ID(func, bpf_dynptr_from_skb)
-BTF_ID(func, bpf_dynptr_from_xdp)
-#endif
-BTF_ID(func, bpf_dynptr_slice)
-BTF_ID(func, bpf_dynptr_slice_rdwr)
-BTF_ID(func, bpf_dynptr_clone)
-BTF_ID(func, bpf_percpu_obj_new_impl)
-BTF_ID(func, bpf_percpu_obj_drop_impl)
-BTF_ID(func, bpf_throw)
-BTF_ID(func, bpf_wq_set_callback_impl)
-#ifdef CONFIG_CGROUPS
-BTF_ID(func, bpf_iter_css_task_new)
-#endif
-#ifdef CONFIG_BPF_LSM
-BTF_ID(func, bpf_set_dentry_xattr)
-BTF_ID(func, bpf_remove_dentry_xattr)
-#endif
-BTF_SET_END(special_kfunc_set)
-
BTF_ID_LIST(special_kfunc_list)
BTF_ID(func, bpf_obj_new_impl)
BTF_ID(func, bpf_obj_drop_impl)
@@ -12144,6 +12118,8 @@ BTF_ID(func, bpf_list_push_front_impl)
BTF_ID(func, bpf_list_push_back_impl)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
+BTF_ID(func, bpf_list_front)
+BTF_ID(func, bpf_list_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
@@ -12151,6 +12127,9 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_rbtree_root)
+BTF_ID(func, bpf_rbtree_left)
+BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12194,6 +12173,7 @@ BTF_ID(func, bpf_res_spin_lock)
BTF_ID(func, bpf_res_spin_unlock)
BTF_ID(func, bpf_res_spin_lock_irqsave)
BTF_ID(func, bpf_res_spin_unlock_irqrestore)
+BTF_ID(func, __bpf_trap)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -12579,14 +12559,19 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
- btf_id == special_kfunc_list[KF_bpf_list_pop_back];
+ btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
+ btf_id == special_kfunc_list[KF_bpf_list_front] ||
+ btf_id == special_kfunc_list[KF_bpf_list_back];
}
static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
{
return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- btf_id == special_kfunc_list[KF_bpf_rbtree_first];
+ btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_right];
}
static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
@@ -12686,7 +12671,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
break;
case BPF_RB_NODE:
ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
break;
default:
verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -12906,6 +12893,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (is_kfunc_arg_ignore(btf, &args[i]))
continue;
+ if (is_kfunc_arg_prog(btf, &args[i])) {
+ /* Used to reject repeated use of __prog. */
+ if (meta->arg_prog) {
+ verbose(env, "Only 1 prog->aux argument supported per-kfunc\n");
+ return -EFAULT;
+ }
+ meta->arg_prog = true;
+ cur_aux(env)->arg_prog = regno;
+ continue;
+ }
+
if (btf_type_is_scalar(t)) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "R%d is not a scalar\n", regno);
@@ -13200,22 +13198,22 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_RB_NODE:
- if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
- if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
- verbose(env, "rbtree_remove node input must be non-owning ref\n");
+ if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to allocated object\n", i);
return -EINVAL;
}
- if (in_rbtree_lock_required_cb(env)) {
- verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+ if (!reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
return -EINVAL;
}
} else {
- if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
- verbose(env, "arg#%d expected pointer to allocated object\n", i);
+ if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
+ verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
return -EINVAL;
}
- if (!reg->ref_obj_id) {
- verbose(env, "allocated object must be referenced\n");
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "%s not allowed in rbtree cb\n", func_name);
return -EINVAL;
}
}
@@ -13420,6 +13418,178 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
return 0;
}
+/* check special kfuncs and return:
+ * 1 - not fall-through to 'else' branch, continue verification
+ * 0 - fall-through to 'else' branch
+ * < 0 - not fall-through to 'else' branch, return error
+ */
+static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
+ struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
+ const struct btf_type *ptr_type, struct btf *desc_btf)
+{
+ const struct btf_type *ret_t;
+ int err = 0;
+
+ if (meta->btf != btf_vmlinux)
+ return 0;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+ meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ struct btf_struct_meta *struct_meta;
+ struct btf *ret_btf;
+ u32 ret_btf_id;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+ return -ENOMEM;
+
+ if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
+ verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
+ return -EINVAL;
+ }
+
+ ret_btf = env->prog->aux->btf;
+ ret_btf_id = meta->arg_constant.value;
+
+ /* This may be NULL due to user not supplying a BTF */
+ if (!ret_btf) {
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
+ return -EINVAL;
+ }
+
+ ret_t = btf_type_by_id(ret_btf, ret_btf_id);
+ if (!ret_t || !__btf_type_is_struct(ret_t)) {
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
+ return -EINVAL;
+ }
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
+ verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
+ ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
+ return -EINVAL;
+ }
+
+ if (!bpf_global_percpu_ma_set) {
+ mutex_lock(&bpf_percpu_ma_lock);
+ if (!bpf_global_percpu_ma_set) {
+ /* Charge memory allocated with bpf_global_percpu_ma to
+ * root memcg. The obj_cgroup for root memcg is NULL.
+ */
+ err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
+ if (!err)
+ bpf_global_percpu_ma_set = true;
+ }
+ mutex_unlock(&bpf_percpu_ma_lock);
+ if (err)
+ return err;
+ }
+
+ mutex_lock(&bpf_percpu_ma_lock);
+ err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
+ mutex_unlock(&bpf_percpu_ma_lock);
+ if (err)
+ return err;
+ }
+
+ struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
+ if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
+ return -EINVAL;
+ }
+
+ if (struct_meta) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
+ return -EINVAL;
+ }
+ }
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[BPF_REG_0].btf = ret_btf;
+ regs[BPF_REG_0].btf_id = ret_btf_id;
+ if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+ regs[BPF_REG_0].type |= MEM_PERCPU;
+
+ insn_aux->obj_new_size = ret_t->size;
+ insn_aux->kptr_struct_meta = struct_meta;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[BPF_REG_0].btf = meta->arg_btf;
+ regs[BPF_REG_0].btf_id = meta->arg_btf_id;
+
+ insn_aux->kptr_struct_meta =
+ btf_find_struct_meta(meta->arg_btf,
+ meta->arg_btf_id);
+ } else if (is_list_node_type(ptr_type)) {
+ struct btf_field *field = meta->arg_list_head.field;
+
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+ } else if (is_rbtree_node_type(ptr_type)) {
+ struct btf_field *field = meta->arg_rbtree_root.field;
+
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta->ret_btf_id;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
+ ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
+ if (!ret_t || !btf_type_is_struct(ret_t)) {
+ verbose(env,
+ "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
+ return -EINVAL;
+ }
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta->arg_constant.value;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
+ meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
+ enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+
+ if (!meta->arg_constant.found) {
+ verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
+ return -EFAULT;
+ }
+
+ regs[BPF_REG_0].mem_size = meta->arg_constant.value;
+
+ /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
+ regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
+ regs[BPF_REG_0].type |= MEM_RDONLY;
+ } else {
+ /* this will set env->seen_direct_write to true */
+ if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
+ verbose(env, "the prog does not allow writes to packet data\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!meta->initialized_dynptr.id) {
+ verbose(env, "verifier internal error: no dynptr id\n");
+ return -EFAULT;
+ }
+ regs[BPF_REG_0].dynptr_id = meta->initialized_dynptr.id;
+
+ /* we don't need to set BPF_REG_0's ref obj id
+ * because packet slices are not refcounted (see
+ * dynptr_type_refcounted)
+ */
+ } else {
+ return 0;
+ }
+
+ return 1;
+}
+
static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -13434,7 +13604,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_insn_aux_data *insn_aux;
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
- const struct btf_type *ret_t;
struct btf *desc_btf;
/* skip for now, but return error when we find this in fixup_kfunc_call */
@@ -13476,6 +13645,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return err;
}
__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
+ } else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
+ verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
+ return -EFAULT;
}
if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
@@ -13654,165 +13826,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
} else if (btf_type_is_ptr(t)) {
ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
-
- if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
- meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
- struct btf_struct_meta *struct_meta;
- struct btf *ret_btf;
- u32 ret_btf_id;
-
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
- return -ENOMEM;
-
- if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
- verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
- return -EINVAL;
- }
-
- ret_btf = env->prog->aux->btf;
- ret_btf_id = meta.arg_constant.value;
-
- /* This may be NULL due to user not supplying a BTF */
- if (!ret_btf) {
- verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
- return -EINVAL;
- }
-
- ret_t = btf_type_by_id(ret_btf, ret_btf_id);
- if (!ret_t || !__btf_type_is_struct(ret_t)) {
- verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
- return -EINVAL;
- }
-
- if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
- if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
- verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
- ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
- return -EINVAL;
- }
-
- if (!bpf_global_percpu_ma_set) {
- mutex_lock(&bpf_percpu_ma_lock);
- if (!bpf_global_percpu_ma_set) {
- /* Charge memory allocated with bpf_global_percpu_ma to
- * root memcg. The obj_cgroup for root memcg is NULL.
- */
- err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
- if (!err)
- bpf_global_percpu_ma_set = true;
- }
- mutex_unlock(&bpf_percpu_ma_lock);
- if (err)
- return err;
- }
-
- mutex_lock(&bpf_percpu_ma_lock);
- err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
- mutex_unlock(&bpf_percpu_ma_lock);
- if (err)
- return err;
- }
-
- struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
- if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
- if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
- verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
- return -EINVAL;
- }
-
- if (struct_meta) {
- verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
- return -EINVAL;
- }
- }
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
- regs[BPF_REG_0].btf = ret_btf;
- regs[BPF_REG_0].btf_id = ret_btf_id;
- if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
- regs[BPF_REG_0].type |= MEM_PERCPU;
-
- insn_aux->obj_new_size = ret_t->size;
- insn_aux->kptr_struct_meta = struct_meta;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
- regs[BPF_REG_0].btf = meta.arg_btf;
- regs[BPF_REG_0].btf_id = meta.arg_btf_id;
-
- insn_aux->kptr_struct_meta =
- btf_find_struct_meta(meta.arg_btf,
- meta.arg_btf_id);
- } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
- meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
- struct btf_field *field = meta.arg_list_head.field;
-
- mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
- } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
- struct btf_field *field = meta.arg_rbtree_root.field;
-
- mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
- } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
- regs[BPF_REG_0].btf = desc_btf;
- regs[BPF_REG_0].btf_id = meta.ret_btf_id;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
- ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
- if (!ret_t || !btf_type_is_struct(ret_t)) {
- verbose(env,
- "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
- return -EINVAL;
- }
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
- regs[BPF_REG_0].btf = desc_btf;
- regs[BPF_REG_0].btf_id = meta.arg_constant.value;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
- meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
- enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
-
- if (!meta.arg_constant.found) {
- verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
- return -EFAULT;
- }
-
- regs[BPF_REG_0].mem_size = meta.arg_constant.value;
-
- /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
- regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
-
- if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
- regs[BPF_REG_0].type |= MEM_RDONLY;
- } else {
- /* this will set env->seen_direct_write to true */
- if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
- verbose(env, "the prog does not allow writes to packet data\n");
- return -EINVAL;
- }
- }
-
- if (!meta.initialized_dynptr.id) {
- verbose(env, "verifier internal error: no dynptr id\n");
- return -EFAULT;
- }
- regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
-
- /* we don't need to set BPF_REG_0's ref obj id
- * because packet slices are not refcounted (see
- * dynptr_type_refcounted)
- */
- } else {
- verbose(env, "kernel function %s unhandled dynamic return type\n",
- meta.func_name);
- return -EFAULT;
- }
+ err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
+ if (err) {
+ if (err < 0)
+ return err;
} else if (btf_type_is_void(ptr_type)) {
/* kfunc returning 'void *' is equivalent to returning scalar */
mark_reg_unknown(env, regs, BPF_REG_0);
@@ -13881,14 +13898,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_kfunc_ret_null(&meta))
regs[BPF_REG_0].id = id;
regs[BPF_REG_0].ref_obj_id = id;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ } else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
ref_set_non_owning(env, &regs[BPF_REG_0]);
}
if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
regs[BPF_REG_0].id = ++env->id_gen;
} else if (btf_type_is_void(t)) {
- if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
+ if (meta.btf == btf_vmlinux) {
if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
insn_aux->kptr_struct_meta =
@@ -16377,6 +16394,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_reg_state *eq_branch_regs;
struct linked_regs linked_regs = {};
u8 opcode = BPF_OP(insn->code);
+ int insn_flags = 0;
bool is_jmp32;
int pred = -1;
int err;
@@ -16435,6 +16453,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
insn->src_reg);
return -EACCES;
}
+
+ if (src_reg->type == PTR_TO_STACK)
+ insn_flags |= INSN_F_SRC_REG_STACK;
+ if (dst_reg->type == PTR_TO_STACK)
+ insn_flags |= INSN_F_DST_REG_STACK;
} else {
if (insn->src_reg != BPF_REG_0) {
verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
@@ -16444,6 +16467,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
memset(src_reg, 0, sizeof(*src_reg));
src_reg->type = SCALAR_VALUE;
__mark_reg_known(src_reg, insn->imm);
+
+ if (dst_reg->type == PTR_TO_STACK)
+ insn_flags |= INSN_F_DST_REG_STACK;
+ }
+
+ if (insn_flags) {
+ err = push_insn_history(env, this_branch, insn_flags, 0);
+ if (err)
+ return err;
}
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
@@ -19659,10 +19691,9 @@ process_bpf_exit:
return err;
break;
} else {
- if (WARN_ON_ONCE(env->cur_state->loop_entry)) {
- verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n");
+ if (verifier_bug_if(env->cur_state->loop_entry, env,
+ "broken loop detection"))
return -EFAULT;
- }
do_print_state = true;
continue;
}
@@ -20720,10 +20751,9 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
if (bpf_pseudo_kfunc_call(&insn))
continue;
- if (WARN_ON(load_reg == -1)) {
- verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
+ if (verifier_bug_if(load_reg == -1, env,
+ "zext_dst is set, but no reg is defined"))
return -EFAULT;
- }
zext_patch[0] = insn;
zext_patch[1].dst_reg = load_reg;
@@ -21040,11 +21070,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* propagated in any case.
*/
subprog = find_subprog(env, i + insn->imm + 1);
- if (subprog < 0) {
- WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
- i + insn->imm + 1);
+ if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
+ i + insn->imm + 1))
return -EFAULT;
- }
/* temporarily remember subprog id inside insn instead of
* aux_data, since next loop will split up all insns into funcs
*/
@@ -21487,13 +21515,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
- } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
- struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
+ }
- insn_buf[0] = ld_addrs[0];
- insn_buf[1] = ld_addrs[1];
- insn_buf[2] = *insn;
- *cnt = 3;
+ if (env->insn_aux_data[insn_idx].arg_prog) {
+ u32 regno = env->insn_aux_data[insn_idx].arg_prog;
+ struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
+ int idx = *cnt;
+
+ insn_buf[idx++] = ld_addrs[0];
+ insn_buf[idx++] = ld_addrs[1];
+ insn_buf[idx++] = *insn;
+ *cnt = idx;
}
return 0;
}
@@ -22403,7 +22435,7 @@ next_insn:
continue;
/* We need two slots in case timed may_goto is supported. */
if (stack_slots > slots) {
- verbose(env, "verifier bug: stack_slots supports may_goto only\n");
+ verifier_bug(env, "stack_slots supports may_goto only");
return -EFAULT;
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 793e288f63cf..2c41c78be61e 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5791,21 +5791,8 @@ static int bpf_scx_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
-static const struct bpf_func_proto *
-bpf_scx_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
- switch (func_id) {
- case BPF_FUNC_task_storage_get:
- return &bpf_task_storage_get_proto;
- case BPF_FUNC_task_storage_delete:
- return &bpf_task_storage_delete_proto;
- default:
- return bpf_base_func_proto(func_id, prog);
- }
-}
-
static const struct bpf_verifier_ops bpf_scx_verifier_ops = {
- .get_func_proto = bpf_scx_get_func_proto,
+ .get_func_proto = bpf_base_func_proto,
.is_valid_access = bpf_scx_is_valid_access,
.btf_struct_access = bpf_scx_btf_struct_access,
};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 187dc37d61d4..132c8be6f635 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -572,7 +572,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
return value;
}
-static const struct bpf_func_proto bpf_perf_event_read_proto = {
+const struct bpf_func_proto bpf_perf_event_read_proto = {
.func = bpf_perf_event_read,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -882,7 +882,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig)
return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_proto = {
+const struct bpf_func_proto bpf_send_signal_proto = {
.func = bpf_send_signal,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -894,7 +894,7 @@ BPF_CALL_1(bpf_send_signal_thread, u32, sig)
return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_thread_proto = {
+const struct bpf_func_proto bpf_send_signal_thread_proto = {
.func = bpf_send_signal_thread,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1185,7 +1185,7 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
return entry_cnt * br_entry_size;
}
-static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
+const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
.func = bpf_get_branch_snapshot,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -1430,56 +1430,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
const struct bpf_func_proto *func_proto;
switch (func_id) {
- case BPF_FUNC_map_lookup_elem:
- return &bpf_map_lookup_elem_proto;
- case BPF_FUNC_map_update_elem:
- return &bpf_map_update_elem_proto;
- case BPF_FUNC_map_delete_elem:
- return &bpf_map_delete_elem_proto;
- case BPF_FUNC_map_push_elem:
- return &bpf_map_push_elem_proto;
- case BPF_FUNC_map_pop_elem:
- return &bpf_map_pop_elem_proto;
- case BPF_FUNC_map_peek_elem:
- return &bpf_map_peek_elem_proto;
- case BPF_FUNC_map_lookup_percpu_elem:
- return &bpf_map_lookup_percpu_elem_proto;
- case BPF_FUNC_ktime_get_ns:
- return &bpf_ktime_get_ns_proto;
- case BPF_FUNC_ktime_get_boot_ns:
- return &bpf_ktime_get_boot_ns_proto;
- case BPF_FUNC_tail_call:
- return &bpf_tail_call_proto;
- case BPF_FUNC_get_current_task:
- return &bpf_get_current_task_proto;
- case BPF_FUNC_get_current_task_btf:
- return &bpf_get_current_task_btf_proto;
- case BPF_FUNC_task_pt_regs:
- return &bpf_task_pt_regs_proto;
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
- case BPF_FUNC_trace_printk:
- return bpf_get_trace_printk_proto();
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
- case BPF_FUNC_get_numa_node_id:
- return &bpf_get_numa_node_id_proto;
- case BPF_FUNC_perf_event_read:
- return &bpf_perf_event_read_proto;
- case BPF_FUNC_get_prandom_u32:
- return &bpf_get_prandom_u32_proto;
- case BPF_FUNC_probe_read_user:
- return &bpf_probe_read_user_proto;
- case BPF_FUNC_probe_read_kernel:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_proto;
- case BPF_FUNC_probe_read_user_str:
- return &bpf_probe_read_user_str_proto;
- case BPF_FUNC_probe_read_kernel_str:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_str_proto;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
case BPF_FUNC_probe_read:
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
@@ -1488,65 +1440,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_compat_str_proto;
#endif
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_cgrp_storage_get:
- return &bpf_cgrp_storage_get_proto;
- case BPF_FUNC_cgrp_storage_delete:
- return &bpf_cgrp_storage_delete_proto;
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
-#endif
- case BPF_FUNC_send_signal:
- return &bpf_send_signal_proto;
- case BPF_FUNC_send_signal_thread:
- return &bpf_send_signal_thread_proto;
- case BPF_FUNC_perf_event_read_value:
- return &bpf_perf_event_read_value_proto;
- case BPF_FUNC_ringbuf_output:
- return &bpf_ringbuf_output_proto;
- case BPF_FUNC_ringbuf_reserve:
- return &bpf_ringbuf_reserve_proto;
- case BPF_FUNC_ringbuf_submit:
- return &bpf_ringbuf_submit_proto;
- case BPF_FUNC_ringbuf_discard:
- return &bpf_ringbuf_discard_proto;
- case BPF_FUNC_ringbuf_query:
- return &bpf_ringbuf_query_proto;
- case BPF_FUNC_jiffies64:
- return &bpf_jiffies64_proto;
- case BPF_FUNC_get_task_stack:
- return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
- : &bpf_get_task_stack_proto;
- case BPF_FUNC_copy_from_user:
- return &bpf_copy_from_user_proto;
- case BPF_FUNC_copy_from_user_task:
- return &bpf_copy_from_user_task_proto;
- case BPF_FUNC_snprintf_btf:
- return &bpf_snprintf_btf_proto;
- case BPF_FUNC_per_cpu_ptr:
- return &bpf_per_cpu_ptr_proto;
- case BPF_FUNC_this_cpu_ptr:
- return &bpf_this_cpu_ptr_proto;
- case BPF_FUNC_task_storage_get:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_get_recur_proto;
- return &bpf_task_storage_get_proto;
- case BPF_FUNC_task_storage_delete:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_delete_recur_proto;
- return &bpf_task_storage_delete_proto;
- case BPF_FUNC_for_each_map_elem:
- return &bpf_for_each_map_elem_proto;
- case BPF_FUNC_snprintf:
- return &bpf_snprintf_proto;
case BPF_FUNC_get_func_ip:
return &bpf_get_func_ip_proto_tracing;
- case BPF_FUNC_get_branch_snapshot:
- return &bpf_get_branch_snapshot_proto;
- case BPF_FUNC_find_vma:
- return &bpf_find_vma_proto;
- case BPF_FUNC_trace_vprintk:
- return bpf_get_trace_vprintk_proto();
default:
break;
}
@@ -1858,7 +1753,7 @@ static struct pt_regs *get_bpf_raw_tp_regs(void)
struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
- if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
this_cpu_dec(bpf_raw_tp_nest_level);
return ERR_PTR(-EBUSY);
}
@@ -2987,6 +2882,9 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
+ if (attr->link_create.flags)
+ return -EINVAL;
+
if (!is_kprobe_multi(prog))
return -EINVAL;
@@ -3376,6 +3274,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
+ if (attr->link_create.flags)
+ return -EINVAL;
+
if (!is_uprobe_multi(prog))
return -EINVAL;
@@ -3417,7 +3318,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
}
if (pid) {
+ rcu_read_lock();
task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
+ rcu_read_unlock();
if (!task) {
err = -ESRCH;
goto error_path_put;
@@ -3565,6 +3468,146 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
late_initcall(bpf_kprobe_multi_kfuncs_init);
+typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
+
+/*
+ * The __always_inline is to make sure the compiler doesn't
+ * generate indirect calls into callbacks, which is expensive,
+ * on some kernel configurations. This allows compiler to put
+ * direct calls into all the specific callback implementations
+ * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
+ */
+static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
+ const void *unsafe_src,
+ copy_fn_t str_copy_fn,
+ struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ u32 chunk_sz, off;
+ void *dst_slice;
+ int cnt, err;
+ char buf[256];
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return str_copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz - 1) {
+ chunk_sz = min_t(u32, sizeof(buf), size - off);
+ /* Expect str_copy_fn to return count of copied bytes, including
+ * zero terminator. Next iteration increment off by chunk_sz - 1 to
+ * overwrite NUL.
+ */
+ cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (cnt < 0)
+ return cnt;
+ err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
+ if (err)
+ return err;
+ if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
+ return off + cnt;
+ }
+ return off;
+}
+
+static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
+ u32 size, const void *unsafe_src,
+ copy_fn_t copy_fn, struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ void *dst_slice;
+ char buf[256];
+ u32 off, chunk_sz;
+ int err;
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz) {
+ chunk_sz = min_t(u32, sizeof(buf), size - off);
+ err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (err)
+ return err;
+ err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (!tsk) { /* Read from the current task */
+ ret = copy_from_user(dst, (const void __user *)unsafe_src, size);
+ if (ret)
+ return -EFAULT;
+ return 0;
+ }
+
+ ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ if (ret != size)
+ return -EFAULT;
+ return 0;
+}
+
+static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (unlikely(size == 0))
+ return 0;
+
+ if (tsk) {
+ ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ } else {
+ ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
+ /* strncpy_from_user does not guarantee NUL termination */
+ if (ret >= 0)
+ ((char *)dst)[ret] = '\0';
+ }
+
+ if (ret < 0)
+ return ret;
+ return ret + 1;
+}
+
+static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
@@ -3576,4 +3619,62 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
return bpf_send_signal_common(sig, type, task, value);
}
+__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, tsk);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, tsk);
+}
+
__bpf_kfunc_end_defs();
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 35cf76c75dd7..f95a2c3d5b1b 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1489,7 +1489,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
- *probe_addr = 0;
+ *probe_addr = tu->ref_ctr_offset;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 042d3ac3b4a3..a5bde5db58ef 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4870,7 +4870,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
SMP_ALLOW_STK)) {
- result = L2CAP_CR_LE_AUTHENTICATION;
+ result = pchan->sec_level == BT_SECURITY_MEDIUM ?
+ L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION;
chan = NULL;
goto response_unlock;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 261926dccc7e..14a9462fced5 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2566,7 +2566,8 @@ static int mgmt_hci_cmd_sync(struct sock *sk, struct hci_dev *hdev,
struct mgmt_pending_cmd *cmd;
int err;
- if (len < sizeof(*cp))
+ if (len != (offsetof(struct mgmt_cp_hci_cmd_sync, params) +
+ le16_to_cpu(cp->params_len)))
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
MGMT_STATUS_INVALID_PARAMS);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 7cb192cbd65f..aaf13a7d58ed 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -569,6 +569,11 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
+int noinline bpf_fentry_test10(const void *a)
+{
+ return (long)a;
+}
+
void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
{
}
@@ -699,7 +704,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
bpf_fentry_test8(&arg) != 0 ||
- bpf_fentry_test9(&retval) != 0)
+ bpf_fentry_test9(&retval) != 0 ||
+ bpf_fentry_test10((void *)0) != 0)
goto out;
break;
case BPF_MODIFY_RETURN:
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b514d95c528..a388f459a366 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9968,6 +9968,7 @@ int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
return dev->netdev_ops->ndo_bpf(dev, bpf);
}
+EXPORT_SYMBOL_GPL(netif_xdp_propagate);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
diff --git a/net/core/devmem.h b/net/core/devmem.h
index e7ba77050b8f..0a3b28ba5c13 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -170,8 +170,9 @@ static inline void __net_devmem_dmabuf_binding_free(struct work_struct *wq)
}
static inline struct net_devmem_dmabuf_binding *
-net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+net_devmem_bind_dmabuf(struct net_device *dev,
enum dma_data_direction direction,
+ unsigned int dmabuf_fd,
struct netdev_nl_sock *priv,
struct netlink_ext_ack *extack)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index 577a4504e26f..327ca73f9cd7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1968,10 +1968,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
bool do_mforce = flags & BPF_F_MARK_ENFORCE;
+ bool is_ipv6 = flags & BPF_F_IPV6;
__sum16 *ptr;
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
- BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
+ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6)))
return -EINVAL;
if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
@@ -1987,7 +1988,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
if (unlikely(from != 0))
return -EINVAL;
- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6);
break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
@@ -8023,10 +8024,6 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_proto;
@@ -8052,10 +8049,6 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_bind:
switch (prog->expected_attach_type) {
@@ -8489,18 +8482,12 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_pop_data_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sk_msg_proto;
-#ifdef CONFIG_CGROUP_NET_CLASSID
- case BPF_FUNC_get_cgroup_classid:
- return &bpf_get_cgroup_classid_curr_proto;
-#endif
default:
return bpf_sk_base_func_proto(func_id, prog);
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4011eb305cee..ba7cf3e3c32f 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -153,9 +153,9 @@ u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#else
-#define alloc_stat_inc(pool, __stat)
-#define recycle_stat_inc(pool, __stat)
-#define recycle_stat_add(pool, __stat, val)
+#define alloc_stat_inc(...) do { } while (0)
+#define recycle_stat_inc(...) do { } while (0)
+#define recycle_stat_add(...) do { } while (0)
#endif
static bool page_pool_producer_lock(struct page_pool *pool)
@@ -741,19 +741,16 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem)
{
- int ret;
- /* BH protection not needed if current is softirq */
- if (in_softirq())
- ret = ptr_ring_produce(&pool->ring, (__force void *)netmem);
- else
- ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem);
+ bool in_softirq, ret;
- if (!ret) {
+ /* BH protection not needed if current is softirq */
+ in_softirq = page_pool_producer_lock(pool);
+ ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem);
+ if (ret)
recycle_stat_inc(pool, ring);
- return true;
- }
+ page_pool_producer_unlock(pool, in_softirq);
- return false;
+ return ret;
}
/* Only allow direct recycling in special circumstances, into the
@@ -1150,10 +1147,14 @@ static void page_pool_scrub(struct page_pool *pool)
static int page_pool_release(struct page_pool *pool)
{
+ bool in_softirq;
int inflight;
page_pool_scrub(pool);
inflight = page_pool_inflight(pool, true);
+ /* Acquire producer lock to make sure producers have exited. */
+ in_softirq = page_pool_producer_lock(pool);
+ page_pool_producer_unlock(pool, in_softirq);
if (!inflight)
__page_pool_destroy(pool);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 0ddc4c718833..34c51eb1a14f 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -530,16 +530,22 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
u32 off, u32 len,
struct sk_psock *psock,
struct sock *sk,
- struct sk_msg *msg)
+ struct sk_msg *msg,
+ bool take_ref)
{
int num_sge, copied;
+ /* skb_to_sgvec will fail when the total number of fragments in
+ * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the
+ * caller may aggregate multiple skbs.
+ */
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (num_sge < 0) {
/* skb linearize may fail with ENOMEM, but lets simply try again
* later if this happens. Under memory pressure we don't want to
* drop the skb. We need to linearize the skb so that the mapping
* in skb_to_sgvec can not error.
+ * Note that skb_linearize requires the skb not to be shared.
*/
if (skb_linearize(skb))
return -EAGAIN;
@@ -556,7 +562,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge;
- msg->skb = skb;
+ msg->skb = take_ref ? skb_get(skb) : skb;
sk_psock_queue_msg(psock, msg);
sk_psock_data_ready(sk, psock);
@@ -564,7 +570,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
}
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len);
+ u32 off, u32 len, bool take_ref);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
@@ -578,7 +584,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* correctly.
*/
if (unlikely(skb->sk == sk))
- return sk_psock_skb_ingress_self(psock, skb, off, len);
+ return sk_psock_skb_ingress_self(psock, skb, off, len, true);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
@@ -590,7 +596,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true);
if (err < 0)
kfree(msg);
return err;
@@ -601,7 +607,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* because the skb is already accounted for here.
*/
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len)
+ u32 off, u32 len, bool take_ref)
{
struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
struct sock *sk = psock->sk;
@@ -610,7 +616,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
if (unlikely(!msg))
return -EAGAIN;
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
kfree(msg);
return err;
@@ -619,18 +625,13 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- int err = 0;
-
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- skb_get(skb);
- err = sk_psock_skb_ingress(psock, skb, off, len);
- if (err < 0)
- kfree_skb(skb);
- return err;
+
+ return sk_psock_skb_ingress(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -655,12 +656,14 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
+ /* Increment the psock refcnt to synchronize with close(fd) path in
+ * sock_map_close(), ensuring we wait for backlog thread completion
+ * before sk_socket freed. If refcnt increment fails, it indicates
+ * sock_map_close() completed with sk_socket potentially already freed.
+ */
+ if (!sk_psock_get(psock->sk))
+ return;
mutex_lock(&psock->work_mutex);
- if (unlikely(state->len)) {
- len = state->len;
- off = state->off;
- }
-
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
@@ -670,6 +673,13 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
+
+ /* Resume processing from previous partial state */
+ if (unlikely(state->len)) {
+ len = state->len;
+ off = state->off;
+ }
+
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -680,7 +690,8 @@ static void sk_psock_backlog(struct work_struct *work)
if (ret <= 0) {
if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, len, off);
-
+ /* Restore redir info we cleared before */
+ skb_bpf_set_redir(skb, psock->sk, ingress);
/* Delay slightly to prioritize any
* other work that might be here.
*/
@@ -697,11 +708,14 @@ static void sk_psock_backlog(struct work_struct *work)
len -= ret;
} while (len);
+ /* The entire skb sent, clear state */
+ sk_psock_skb_state(psock, state, 0, 0);
skb = skb_dequeue(&psock->ingress_skb);
kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
+ sk_psock_put(psock->sk, psock);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -1014,7 +1028,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
off = stm->offset;
len = stm->full_len;
}
- err = sk_psock_skb_ingress_self(psock, skb, off, len);
+ err = sk_psock_skb_ingress_self(psock, skb, off, len, false);
}
if (err < 0) {
spin_lock_bh(&psock->ingress_lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 341979874459..3b409bc8ef6d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3284,16 +3284,16 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
struct proto *prot = sk->sk_prot;
- bool charged = false;
+ bool charged = true;
long allocated;
sk_memory_allocated_add(sk, amt);
allocated = sk_memory_allocated(sk);
if (memcg) {
- if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()))
+ charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge());
+ if (!charged)
goto suppress_allocation;
- charged = true;
}
/* Under limit. */
@@ -3378,7 +3378,7 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
- if (charged)
+ if (memcg && charged)
mem_cgroup_uncharge_skmem(memcg, amt);
return 0;
diff --git a/net/core/utils.c b/net/core/utils.c
index e47feeaa5a49..5e63b0ea21f3 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
EXPORT_SYMBOL(inet_proto_csum_replace16);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr)
+ __wsum diff, bool pseudohdr, bool ipv6)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_replace_by_diff(sum, diff);
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
skb->csum = ~csum_sub(diff, skb->csum);
} else if (pseudohdr) {
*sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 8c3c068728e5..fe75821623a4 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -257,7 +257,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
int source_port;
u8 *brcm_tag;
- if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
+ if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN)))
return NULL;
brcm_tag = dsa_etype_header_pos_rx(skb);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 9c775f8aa438..85b5aa82d7d7 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -495,6 +495,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
bool copy_dtor;
__sum16 check;
__be16 newlen;
+ int ret = 0;
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
@@ -523,6 +524,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+ ret = __skb_linearize(gso_skb);
+ if (ret)
+ return ERR_PTR(ret);
+
/* Setup csum, as fraglist skips this in udp4_gro_receive. */
gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
gso_skb->csum_offset = offsetof(struct udphdr, check);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 95e9146918cc..b8d43ed4689d 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&th->check, skb,
- diff, true);
+ diff, true, true);
}
break;
case NEXTHDR_UDP:
@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&uh->check, skb,
- diff, true);
+ diff, true, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
- diff, true);
+ diff, true, true);
}
break;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index aad84aabd7f1..f391cd267922 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -248,7 +248,7 @@ static noinline bool
nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_ct)
{
- static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT;
+ static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST;
const struct nf_conntrack_tuple_hash *thash;
const struct nf_conntrack_zone *zone;
struct nf_conn *ct;
@@ -287,8 +287,14 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
zone = nf_ct_zone(ignored_ct);
thash = nf_conntrack_find_get(net, zone, tuple);
- if (unlikely(!thash)) /* clashing entry went away */
- return false;
+ if (unlikely(!thash)) {
+ struct nf_conntrack_tuple reply;
+
+ nf_ct_invert_tuple(&reply, tuple);
+ thash = nf_conntrack_find_get(net, zone, &reply);
+ if (!thash) /* clashing entry went away */
+ return false;
+ }
ct = nf_ct_tuplehash_to_ctrack(thash);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index c15db28c5ebc..be7c16c79f71 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1114,6 +1114,25 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
}
/**
+ * pipapo_resmap_init_avx2() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Like pipapo_resmap_init() but do not set start map bits covered by the first field.
+ */
+static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ /* Starting map doesn't need to be set to all-ones for this implementation,
+ * but we do need to zero the remaining bits, if any.
+ */
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
+
+/**
* nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
* @net: Network namespace
* @set: nftables API set representation
@@ -1171,7 +1190,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
res = scratch->map + (map_index ? m->bsize_max : 0);
fill = scratch->map + (map_index ? 0 : m->bsize_max);
- /* Starting map doesn't need to be set for this implementation */
+ pipapo_resmap_init_avx2(m, res);
nft_pipapo_avx2_prepare();
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 1f7c136d6d0e..0a260df45d25 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -45,8 +45,9 @@ static void none_free_call_crypto(struct rxrpc_call *call)
static bool none_validate_challenge(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
- return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
- rxrpc_eproto_rxnull_challenge);
+ rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_challenge);
+ return true;
}
static int none_sendmsg_respond_to_challenge(struct sk_buff *challenge,
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index f4cfe88670f5..ea5bb131ebd0 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -818,7 +818,11 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
}
/* Get net to avoid freed tipc_crypto when delete namespace */
- get_net(aead->crypto->net);
+ if (!maybe_get_net(aead->crypto->net)) {
+ tipc_bearer_put(b);
+ rc = -ENODEV;
+ goto exit;
+ }
/* Now, do encrypt */
rc = crypto_aead_encrypt(req);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 914d4e1516a3..fc88e34b7f33 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -908,6 +908,13 @@ more_data:
&msg_redir, send, flags);
lock_sock(sk);
if (err < 0) {
+ /* Regardless of whether the data represented by
+ * msg_redir is sent successfully, we have already
+ * uncharged it via sk_msg_return_zero(). The
+ * msg->sg.size represents the remaining unprocessed
+ * data, which needs to be uncharged here.
+ */
+ sk_mem_uncharge(sk, msg->sg.size);
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
msg->sg.size = 0;
}
@@ -1120,9 +1127,13 @@ alloc_encrypted:
num_async++;
else if (ret == -ENOMEM)
goto wait_for_memory;
- else if (ctx->open_rec && ret == -ENOSPC)
+ else if (ctx->open_rec && ret == -ENOSPC) {
+ if (msg_pl->cork_bytes) {
+ ret = 0;
+ goto send_end;
+ }
goto rollback_iter;
- else if (ret != -EAGAIN)
+ } else if (ret != -EAGAIN)
goto send_end;
}
continue;
diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf
index fbaaec2187e5..db76335dd917 100644
--- a/scripts/Makefile.btf
+++ b/scripts/Makefile.btf
@@ -23,6 +23,8 @@ else
# Switch to using --btf_features for v1.26 and later.
pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j$(JOBS) --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
+pahole-flags-$(call test-ge, $(pahole-ver), 130) += --btf_features=attributes
+
ifneq ($(KBUILD_EXTMOD),)
module-pahole-flags-$(call test-ge, $(pahole-ver), 128) += --btf_features=distilled_base
endif
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index e74a01a85070..c77dc40f7689 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -8,6 +8,7 @@
from __future__ import print_function
import argparse
+import json
import re
import sys, os
import subprocess
@@ -37,11 +38,17 @@ class APIElement(object):
@desc: textual description of the symbol
@ret: (optional) description of any associated return value
"""
- def __init__(self, proto='', desc='', ret='', attrs=[]):
+ def __init__(self, proto='', desc='', ret=''):
self.proto = proto
self.desc = desc
self.ret = ret
- self.attrs = attrs
+
+ def to_dict(self):
+ return {
+ 'proto': self.proto,
+ 'desc': self.desc,
+ 'ret': self.ret
+ }
class Helper(APIElement):
@@ -51,8 +58,9 @@ class Helper(APIElement):
@desc: textual description of the helper function
@ret: description of the return value of the helper function
"""
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
+ def __init__(self, proto='', desc='', ret='', attrs=[]):
+ super().__init__(proto, desc, ret)
+ self.attrs = attrs
self.enum_val = None
def proto_break_down(self):
@@ -81,6 +89,12 @@ class Helper(APIElement):
return res
+ def to_dict(self):
+ d = super().to_dict()
+ d["attrs"] = self.attrs
+ d.update(self.proto_break_down())
+ return d
+
ATTRS = {
'__bpf_fastcall': 'bpf_fastcall'
@@ -675,7 +689,7 @@ COMMANDS
self.print_elem(command)
-class PrinterHelpers(Printer):
+class PrinterHelpersHeader(Printer):
"""
A printer for dumping collected information about helpers as C header to
be included from BPF program.
@@ -896,6 +910,43 @@ class PrinterHelpers(Printer):
print(') = (void *) %d;' % helper.enum_val)
print('')
+
+class PrinterHelpersJSON(Printer):
+ """
+ A printer for dumping collected information about helpers as a JSON file.
+ @parser: A HeaderParser with Helper objects
+ """
+
+ def __init__(self, parser):
+ self.elements = parser.helpers
+ self.elem_number_check(
+ parser.desc_unique_helpers,
+ parser.define_unique_helpers,
+ "helper",
+ "___BPF_FUNC_MAPPER",
+ )
+
+ def print_all(self):
+ helper_dicts = [helper.to_dict() for helper in self.elements]
+ out_dict = {'helpers': helper_dicts}
+ print(json.dumps(out_dict, indent=4))
+
+
+class PrinterSyscallJSON(Printer):
+ """
+ A printer for dumping collected syscall information as a JSON file.
+ @parser: A HeaderParser with APIElement objects
+ """
+
+ def __init__(self, parser):
+ self.elements = parser.commands
+ self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
+
+ def print_all(self):
+ syscall_dicts = [syscall.to_dict() for syscall in self.elements]
+ out_dict = {'syscall': syscall_dicts}
+ print(json.dumps(out_dict, indent=4))
+
###############################################################################
# If script is launched from scripts/ from kernel tree and can access
@@ -905,9 +956,17 @@ script = os.path.abspath(sys.argv[0])
linuxRoot = os.path.dirname(os.path.dirname(script))
bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
+# target -> output format -> printer
printers = {
- 'helpers': PrinterHelpersRST,
- 'syscall': PrinterSyscallRST,
+ 'helpers': {
+ 'rst': PrinterHelpersRST,
+ 'json': PrinterHelpersJSON,
+ 'header': PrinterHelpersHeader,
+ },
+ 'syscall': {
+ 'rst': PrinterSyscallRST,
+ 'json': PrinterSyscallJSON
+ },
}
argParser = argparse.ArgumentParser(description="""
@@ -917,6 +976,8 @@ rst2man utility.
""")
argParser.add_argument('--header', action='store_true',
help='generate C header file')
+argParser.add_argument('--json', action='store_true',
+ help='generate a JSON')
if (os.path.isfile(bpfh)):
argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
default=bpfh)
@@ -924,17 +985,35 @@ else:
argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
argParser.add_argument('target', nargs='?', default='helpers',
choices=printers.keys(), help='eBPF API target')
-args = argParser.parse_args()
-
-# Parse file.
-headerParser = HeaderParser(args.filename)
-headerParser.run()
-# Print formatted output to standard output.
-if args.header:
- if args.target != 'helpers':
- raise NotImplementedError('Only helpers header generation is supported')
- printer = PrinterHelpers(headerParser)
-else:
- printer = printers[args.target](headerParser)
-printer.print_all()
+def error_die(message: str):
+ argParser.print_usage(file=sys.stderr)
+ print('Error: {}'.format(message), file=sys.stderr)
+ exit(1)
+
+def parse_and_dump():
+ args = argParser.parse_args()
+
+ # Parse file.
+ headerParser = HeaderParser(args.filename)
+ headerParser.run()
+
+ if args.header and args.json:
+ error_die('Use either --header or --json, not both')
+
+ output_format = 'rst'
+ if args.header:
+ output_format = 'header'
+ elif args.json:
+ output_format = 'json'
+
+ try:
+ printer = printers[args.target][output_format](headerParser)
+ # Print formatted output to standard output.
+ printer.print_all()
+ except KeyError:
+ error_die('Unsupported target/format combination: "{}", "{}"'
+ .format(args.target, output_format))
+
+if __name__ == "__main__":
+ parse_and_dump()
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index d6304e01afe0..da3152c16228 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -31,7 +31,7 @@ PROG COMMANDS
| **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }]
| **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }]
| **bpftool** **prog pin** *PROG* *FILE*
-| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
+| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] [**kernel_btf** *BTF_FILE*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
@@ -127,7 +127,7 @@ bpftool prog pin *PROG* *FILE*
Note: *FILE* must be located in *bpffs* mount. It must not contain a dot
character ('.'), which is reserved for future extensions of *bpffs*.
-bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach]
+bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach] [kernel_btf *BTF_FILE*]
Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog
load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog
loadall** pins all programs from the *OBJ* under *PATH* directory. **type**
@@ -153,6 +153,12 @@ bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | na
program does not support autoattach, bpftool falls back to regular pinning
for that program instead.
+ The **kernel_btf** option allows specifying an external BTF file to replace
+ the system's own vmlinux BTF file for CO-RE relocations. Note that any
+ other feature relying on BTF (such as fentry/fexit programs, struct_ops)
+ requires the BTF file for the actual kernel running on the host, often
+ exposed at /sys/kernel/btf/vmlinux.
+
Note: *PATH* must be located in *bpffs* mount. It must not contain a dot
character ('.'), which is reserved for future extensions of *bpffs*.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 1ce409a6cbd9..27512feb5c70 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -505,13 +505,13 @@ _bpftool()
_bpftool_get_map_names
return 0
;;
- pinned|pinmaps)
+ pinned|pinmaps|kernel_btf)
_filedir
return 0
;;
*)
COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
- _bpftool_once_attr 'type pinmaps autoattach'
+ _bpftool_once_attr 'type pinmaps autoattach kernel_btf'
_bpftool_one_of_list 'offload_dev xdpmeta_dev'
return 0
;;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 93b139bfb988..944ebe21a216 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -221,7 +221,7 @@ static int cgroup_has_attached_progs(int cgroup_fd)
for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) {
int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]);
- if (count < 0)
+ if (count < 0 && errno != EINVAL)
return -1;
if (count > 0) {
@@ -318,11 +318,11 @@ static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
static int do_show(int argc, char **argv)
{
- enum bpf_attach_type type;
int has_attached_progs;
const char *path;
int cgroup_fd;
int ret = -1;
+ unsigned int i;
query_flags = 0;
@@ -370,14 +370,14 @@ static int do_show(int argc, char **argv)
"AttachFlags", "Name");
btf_vmlinux = libbpf_find_kernel_btf();
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) {
/*
* Not all attach types may be supported, so it's expected,
* that some requests will fail.
* If we were able to get the show for at least one
* attach type, let's return 0.
*/
- if (show_bpf_progs(cgroup_fd, type, 0) == 0)
+ if (show_bpf_progs(cgroup_fd, cgroup_attach_types[i], 0) == 0)
ret = 0;
}
@@ -400,9 +400,9 @@ exit:
static int do_show_tree_fn(const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftw)
{
- enum bpf_attach_type type;
int has_attached_progs;
int cgroup_fd;
+ unsigned int i;
if (typeflag != FTW_D)
return 0;
@@ -434,8 +434,8 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
}
btf_vmlinux = libbpf_find_kernel_btf();
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
- show_bpf_progs(cgroup_fd, type, ftw->level);
+ for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++)
+ show_bpf_progs(cgroup_fd, cgroup_attach_types[i], ftw->level);
if (errno == EINVAL)
/* Last attach type does not support query.
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 52fd2c9fac56..3535afc80a49 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -380,6 +380,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
u64_to_ptr(info->perf_event.uprobe.file_name));
jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset);
jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie);
+ jsonw_uint_field(wtr, "ref_ctr_offset", info->perf_event.uprobe.ref_ctr_offset);
}
static void
@@ -823,6 +824,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info)
printf("%s+%#x ", buf, info->perf_event.uprobe.offset);
if (info->perf_event.uprobe.cookie)
printf("cookie %llu ", info->perf_event.uprobe.cookie);
+ if (info->perf_event.uprobe.ref_ctr_offset)
+ printf("ref_ctr_offset 0x%llx ", info->perf_event.uprobe.ref_ctr_offset);
}
static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f010295350be..96eea8a67225 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1681,8 +1681,17 @@ offload_dev:
} else if (is_prefix(*argv, "autoattach")) {
auto_attach = true;
NEXT_ARG();
+ } else if (is_prefix(*argv, "kernel_btf")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(1))
+ goto err_free_reuse_maps;
+
+ open_opts.btf_custom_path = GET_ARG();
} else {
- p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
+ p_err("expected no more arguments, "
+ "'type', 'map', 'offload_dev', 'xdpmeta_dev', 'pinmaps', "
+ "'autoattach', or 'kernel_btf', got: '%s'?",
*argv);
goto err_free_reuse_maps;
}
@@ -2474,6 +2483,7 @@ static int do_help(int argc, char **argv)
" [map { idx IDX | name NAME } MAP]\\\n"
" [pinmaps MAP_DIR]\n"
" [autoattach]\n"
+ " [kernel_btf BTF_FILE]\n"
" %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
" %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
" %1$s %2$s run PROG \\\n"
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fd404729b115..0b4a2f124d11 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1506,7 +1506,7 @@ union bpf_attr {
__s32 map_token_fd;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1995,11 +1995,15 @@ union bpf_attr {
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2051,7 +2055,8 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
+ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -6068,6 +6073,7 @@ enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
+ BPF_F_IPV6 = (1ULL << 7),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
@@ -6723,6 +6729,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from file_name */
__u64 cookie;
+ __u64 ref_ctr_offset;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index c0e13cdf9660..b997c68bd945 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
+#if defined(__clang__) && (__clang_major__ >= 19)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#elif defined(__GNUC__) && (__GNUC__ >= 14)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#else
#define ___type(...) typeof(___arrow(__VA_ARGS__))
+#endif
#define ___read(read_fn, dst, src_type, src, accessor) \
read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 686824b8b413..a50773d4616e 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -15,6 +15,14 @@
#define __array(name, val) typeof(val) *name[]
#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
+#ifndef likely
+#define likely(x) (__builtin_expect(!!(x), 1))
+#endif
+
+#ifndef unlikely
+#define unlikely(x) (__builtin_expect(!!(x), 0))
+#endif
+
/*
* Helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 38bc6b14b066..f1d495dc66bb 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -12,6 +12,7 @@
#include <sys/utsname.h>
#include <sys/param.h>
#include <sys/stat.h>
+#include <sys/mman.h>
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/btf.h>
@@ -120,6 +121,9 @@ struct btf {
/* whether base_btf should be freed in btf_free for this instance */
bool owns_base;
+ /* whether raw_data is a (read-only) mmap */
+ bool raw_data_is_mmap;
+
/* BTF object FD, if loaded into kernel */
int fd;
@@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)
return (void *)btf->hdr != btf->raw_data;
}
+static void btf_free_raw_data(struct btf *btf)
+{
+ if (btf->raw_data_is_mmap) {
+ munmap(btf->raw_data, btf->raw_size);
+ btf->raw_data_is_mmap = false;
+ } else {
+ free(btf->raw_data);
+ }
+ btf->raw_data = NULL;
+}
+
void btf__free(struct btf *btf)
{
if (IS_ERR_OR_NULL(btf))
@@ -970,7 +985,7 @@ void btf__free(struct btf *btf)
free(btf->types_data);
strset__free(btf->strs_set);
}
- free(btf->raw_data);
+ btf_free_raw_data(btf);
free(btf->raw_data_swapped);
free(btf->type_offs);
if (btf->owns_base)
@@ -996,7 +1011,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)
if (base_btf) {
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len;
+ btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
btf->swapped_endian = base_btf->swapped_endian;
}
@@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)
return libbpf_ptr(btf_new_empty(base_btf));
}
-static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
{
struct btf *btf;
int err;
@@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
btf->start_str_off = base_btf->hdr->str_len;
}
- btf->raw_data = malloc(size);
- if (!btf->raw_data) {
- err = -ENOMEM;
- goto done;
+ if (is_mmap) {
+ btf->raw_data = (void *)data;
+ btf->raw_data_is_mmap = true;
+ } else {
+ btf->raw_data = malloc(size);
+ if (!btf->raw_data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ memcpy(btf->raw_data, data, size);
}
- memcpy(btf->raw_data, data, size);
+
btf->raw_size = size;
btf->hdr = btf->raw_data;
@@ -1083,12 +1104,12 @@ done:
struct btf *btf__new(const void *data, __u32 size)
{
- return libbpf_ptr(btf_new(data, size, NULL));
+ return libbpf_ptr(btf_new(data, size, NULL, false));
}
struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
{
- return libbpf_ptr(btf_new(data, size, base_btf));
+ return libbpf_ptr(btf_new(data, size, base_btf, false));
}
struct btf_elf_secs {
@@ -1148,6 +1169,12 @@ static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs
else
continue;
+ if (sh.sh_type != SHT_PROGBITS) {
+ pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n",
+ sh.sh_type, idx, name, path);
+ goto err;
+ }
+
data = elf_getdata(scn, 0);
if (!data) {
pr_warn("failed to get section(%d, %s) data from %s\n",
@@ -1203,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
if (secs.btf_base_data) {
dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
- NULL);
+ NULL, false);
if (IS_ERR(dist_base_btf)) {
err = PTR_ERR(dist_base_btf);
dist_base_btf = NULL;
@@ -1212,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}
btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
- dist_base_btf ?: base_btf);
+ dist_base_btf ?: base_btf, false);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto done;
@@ -1329,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
}
/* finally parse BTF data */
- btf = btf_new(data, sz, base_btf);
+ btf = btf_new(data, sz, base_btf, false);
err_out:
free(data);
@@ -1348,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
return libbpf_ptr(btf_parse_raw(path, base_btf));
}
+static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
+{
+ struct stat st;
+ void *data;
+ struct btf *btf;
+ int fd, err;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return libbpf_err_ptr(-errno);
+
+ if (fstat(fd, &st) < 0) {
+ err = -errno;
+ close(fd);
+ return libbpf_err_ptr(err);
+ }
+
+ data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ err = -errno;
+ close(fd);
+
+ if (data == MAP_FAILED)
+ return libbpf_err_ptr(err);
+
+ btf = btf_new(data, st.st_size, base_btf, true);
+ if (IS_ERR(btf))
+ munmap(data, st.st_size);
+
+ return btf;
+}
+
static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
{
struct btf *btf;
@@ -1612,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
goto exit_free;
}
- btf = btf_new(ptr, btf_info.btf_size, base_btf);
+ btf = btf_new(ptr, btf_info.btf_size, base_btf, false);
exit_free:
free(ptr);
@@ -1652,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
static void btf_invalidate_raw_data(struct btf *btf)
{
- if (btf->raw_data) {
- free(btf->raw_data);
- btf->raw_data = NULL;
- }
+ if (btf->raw_data)
+ btf_free_raw_data(btf);
if (btf->raw_data_swapped) {
free(btf->raw_data_swapped);
btf->raw_data_swapped = NULL;
@@ -4350,46 +4406,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
}
-/* Check if given two types are identical ARRAY definitions */
-static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth)
{
struct btf_type *t1, *t2;
+ int k1, k2;
+recur:
+ if (depth <= 0)
+ return false;
t1 = btf_type_by_id(d->btf, id1);
t2 = btf_type_by_id(d->btf, id2);
- if (!btf_is_array(t1) || !btf_is_array(t2))
+
+ k1 = btf_kind(t1);
+ k2 = btf_kind(t2);
+ if (k1 != k2)
return false;
- return btf_equal_array(t1, t2);
-}
+ switch (k1) {
+ case BTF_KIND_UNKN: /* VOID */
+ return true;
+ case BTF_KIND_INT:
+ return btf_equal_int_tag(t1, t2);
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ return btf_compat_enum(t1, t2);
+ case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
+ return btf_equal_common(t1, t2);
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_TYPE_TAG:
+ if (t1->info != t2->info || t1->name_off != t2->name_off)
+ return false;
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_ARRAY: {
+ struct btf_array *a1, *a2;
-/* Check if given two types are identical STRUCT/UNION definitions */
-static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
-{
- const struct btf_member *m1, *m2;
- struct btf_type *t1, *t2;
- int n, i;
+ if (!btf_compat_array(t1, t2))
+ return false;
- t1 = btf_type_by_id(d->btf, id1);
- t2 = btf_type_by_id(d->btf, id2);
+ a1 = btf_array(t1);
+ a2 = btf_array(t1);
- if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
- return false;
+ if (a1->index_type != a2->index_type &&
+ !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1))
+ return false;
- if (!btf_shallow_equal_struct(t1, t2))
- return false;
+ if (a1->type != a2->type &&
+ !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1))
+ return false;
- m1 = btf_members(t1);
- m2 = btf_members(t2);
- for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
- if (m1->type != m2->type &&
- !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
- !btf_dedup_identical_structs(d, m1->type, m2->type))
+ return true;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m1, *m2;
+ int i, n;
+
+ if (!btf_shallow_equal_struct(t1, t2))
return false;
+
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+ if (m1->type == m2->type)
+ continue;
+ if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1))
+ return false;
+ }
+ return true;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p1, *p2;
+ int i, n;
+
+ if (!btf_compat_fnproto(t1, t2))
+ return false;
+
+ if (t1->type != t2->type &&
+ !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1))
+ return false;
+
+ p1 = btf_params(t1);
+ p2 = btf_params(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) {
+ if (p1->type == p2->type)
+ continue;
+ if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1))
+ return false;
+ }
+ return true;
+ }
+ default:
+ return false;
}
- return true;
}
+
/*
* Check equivalence of BTF type graph formed by candidate struct/union (we'll
* call it "candidate graph" in this description for brevity) to a type graph
@@ -4508,19 +4627,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
* different fields within the *same* struct. This breaks type
* equivalence check, which makes an assumption that candidate
* types sub-graph has a consistent and deduped-by-compiler
- * types within a single CU. So work around that by explicitly
- * allowing identical array types here.
+ * types within a single CU. And similar situation can happen
+ * with struct/union sometimes, and event with pointers.
+ * So accommodate cases like this doing a structural
+ * comparison recursively, but avoiding being stuck in endless
+ * loops by limiting the depth up to which we check.
*/
- if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
- return 1;
- /* It turns out that similar situation can happen with
- * struct/union sometimes, sigh... Handle the case where
- * structs/unions are exactly the same, down to the referenced
- * type IDs. Anything more complicated (e.g., if referenced
- * types are different, but equivalent) is *way more*
- * complicated and requires a many-to-many equivalence mapping.
- */
- if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+ if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16))
return 1;
return 0;
}
@@ -5268,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void)
pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
sysfs_btf_path);
} else {
- btf = btf__parse(sysfs_btf_path, NULL);
+ btf = btf_parse_raw_mmap(sysfs_btf_path, NULL);
+ if (IS_ERR(btf))
+ btf = btf__parse(sysfs_btf_path, NULL);
+
if (!btf) {
err = -errno;
pr_warn("failed to read kernel BTF from '%s': %s\n",
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6b85060f07b3..e9c641a2fb20 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -60,6 +60,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define MAX_EVENT_NAME_LEN 64
+
#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
@@ -284,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
old_errno = errno;
va_start(args, format);
- __libbpf_pr(level, format, args);
+ print_fn(level, format, args);
va_end(args);
errno = old_errno;
@@ -896,7 +898,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
return -LIBBPF_ERRNO__FORMAT;
}
- if (sec_off + prog_sz > sec_sz) {
+ if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
sec_name, sec_off);
return -LIBBPF_ERRNO__FORMAT;
@@ -1725,15 +1727,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
return ERR_PTR(-ENOENT);
}
-/* Some versions of Android don't provide memfd_create() in their libc
- * implementation, so avoid complications and just go straight to Linux
- * syscall.
- */
-static int sys_memfd_create(const char *name, unsigned flags)
-{
- return syscall(__NR_memfd_create, name, flags);
-}
-
#ifndef MFD_CLOEXEC
#define MFD_CLOEXEC 0x0001U
#endif
@@ -9455,6 +9448,30 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
return 0;
}
+struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog)
+{
+ if (prog->func_info_rec_size != sizeof(struct bpf_func_info))
+ return libbpf_err_ptr(-EOPNOTSUPP);
+ return prog->func_info;
+}
+
+__u32 bpf_program__func_info_cnt(const struct bpf_program *prog)
+{
+ return prog->func_info_cnt;
+}
+
+struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog)
+{
+ if (prog->line_info_rec_size != sizeof(struct bpf_line_info))
+ return libbpf_err_ptr(-EOPNOTSUPP);
+ return prog->line_info;
+}
+
+__u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
+{
+ return prog->line_info_cnt;
+}
+
#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
.sec = (char *)sec_pfx, \
.prog_type = BPF_PROG_TYPE_##ptype, \
@@ -11121,16 +11138,16 @@ static const char *tracefs_available_filter_functions_addrs(void)
: TRACEFS"/available_filter_functions_addrs";
}
-static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
- const char *kfunc_name, size_t offset)
+static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
+ const char *name, size_t offset)
{
static int index = 0;
int i;
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
- __sync_fetch_and_add(&index, 1));
+ snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
+ __sync_fetch_and_add(&index, 1), name, offset);
- /* sanitize binary_path in the probe name */
+ /* sanitize name in the probe name */
for (i = 0; buf[i]; i++) {
if (!isalnum(buf[i]))
buf[i] = '_';
@@ -11255,9 +11272,9 @@ int probe_kern_syscall_wrapper(int token_fd)
return pfd >= 0 ? 1 : 0;
} else { /* legacy mode */
- char probe_name[128];
+ char probe_name[MAX_EVENT_NAME_LEN];
- gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
return 0;
@@ -11313,10 +11330,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
func_name, offset,
-1 /* pid */, 0 /* ref_ctr_off */);
} else {
- char probe_name[256];
+ char probe_name[MAX_EVENT_NAME_LEN];
- gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
- func_name, offset);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+ func_name, offset);
legacy_probe = strdup(probe_name);
if (!legacy_probe)
@@ -11860,20 +11877,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru
return ret;
}
-static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
- const char *binary_path, uint64_t offset)
-{
- int i;
-
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
-
- /* sanitize binary_path in the probe name */
- for (i = 0; buf[i]; i++) {
- if (!isalnum(buf[i]))
- buf[i] = '_';
- }
-}
-
static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
const char *binary_path, size_t offset)
{
@@ -12297,13 +12300,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
func_offset, pid, ref_ctr_off);
} else {
- char probe_name[PATH_MAX + 64];
+ char probe_name[MAX_EVENT_NAME_LEN];
if (ref_ctr_off)
return libbpf_err_ptr(-EINVAL);
- gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
- binary_path, func_offset);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+ strrchr(binary_path, '/') ? : binary_path,
+ func_offset);
legacy_probe = strdup(probe_name);
if (!legacy_probe)
@@ -13371,7 +13375,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
- attr.sample_period = sample_period;
attr.wakeup_events = sample_period;
p.attr = &attr;
@@ -14099,6 +14102,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
}
link = map_skel->link;
+ if (!link) {
+ pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
+ bpf_map__name(map));
+ continue;
+ }
+
if (*link)
continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index fdcee6a71e0f..1137e7d2e1b5 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -940,6 +940,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le
LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);
LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size);
+LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog);
+
+LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
+
/**
* @brief **bpf_program__set_attach_target()** sets BTF-based attach target
* for supported BPF program types:
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d8b71f22f197..1205f9a4fe04 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -437,6 +437,10 @@ LIBBPF_1.6.0 {
bpf_linker__add_fd;
bpf_linker__new_fd;
bpf_object__prepare;
+ bpf_program__func_info;
+ bpf_program__func_info_cnt;
+ bpf_program__line_info;
+ bpf_program__line_info_cnt;
btf__add_decl_attr;
btf__add_type_attr;
} LIBBPF_1.5.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 76669c73dcd1..477a3b3389a0 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -667,6 +667,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags)
return syscall(__NR_dup3, oldfd, newfd, flags);
}
+/* Some versions of Android don't provide memfd_create() in their libc
+ * implementation, so avoid complications and just go straight to Linux
+ * syscall.
+ */
+static inline int sys_memfd_create(const char *name, unsigned flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
/* Point *fixed_fd* to the same file that *tmp_fd* points to.
* Regardless of success, *tmp_fd* is closed.
* Whatever *fixed_fd* pointed to is closed silently.
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 800e0ef09c37..a469e5d4fee7 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -573,7 +573,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz);
- fd = memfd_create(filename, 0);
+ fd = sys_memfd_create(filename, 0);
if (fd < 0) {
ret = -errno;
pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret));
@@ -1376,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
} else {
if (!secs_match(dst_sec, src_sec)) {
pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
- return -1;
+ return -EINVAL;
}
/* "license" and "version" sections are deduped */
@@ -2223,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
}
} else if (!secs_match(dst_sec, src_sec)) {
pr_warn("sections %s are not compatible\n", src_sec->sec_name);
- return -1;
+ return -EINVAL;
}
/* shdr->sh_link points to SYMTAB */
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 975e265eab3b..06663f9ea581 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype,
minlen = nla_attr_minlen[pt->type];
if (libbpf_nla_len(nla) < minlen)
- return -1;
+ return -EINVAL;
if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
- return -1;
+ return -EINVAL;
if (pt->type == LIBBPF_NLA_STRING) {
char *data = libbpf_nla_data(nla);
if (data[libbpf_nla_len(nla) - 1] != '\0')
- return -1;
+ return -EINVAL;
}
return 0;
@@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
if (policy) {
err = validate_nla(nla, maxtype, policy);
if (err < 0)
- goto errout;
+ return err;
}
- if (tb[type])
+ if (tb[type]) {
pr_warn("Attribute of type %#x found multiple times in message, "
"previous attribute is being ignored.\n", type);
+ }
tb[type] = nla;
}
- err = 0;
-errout:
- return err;
+ return 0;
}
/**
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index e70c502a16df..422e186cf3cf 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -10,7 +10,6 @@ CONFIG_KUNIT_EXAMPLE_TEST=y
CONFIG_KUNIT_ALL_TESTS=y
CONFIG_FORTIFY_SOURCE=y
-CONFIG_INIT_STACK_ALL_PATTERN=y
CONFIG_IIO=y
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6aa11cd3db42..339b31e6a6b5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -205,7 +205,7 @@ export KHDR_INCLUDES
all:
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
mkdir $$BUILD_TARGET -p; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \
diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
index f748f2c33b22..1789a61d0a9b 100644
--- a/tools/testing/selftests/bpf/DENYLIST
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -1,5 +1,6 @@
# TEMPORARY
# Alphabetical order
+dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next
get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
stacktrace_build_id
stacktrace_build_id_nmi
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 6d8feda27ce9..12e99c0277a8 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,3 +1 @@
-fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
-fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 66bb50356be0..cf5ed3bee573 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,6 +34,9 @@ OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+SKIP_DOCS ?=
+SKIP_LLVM ?=
+
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -172,6 +175,7 @@ override OUTPUT := $(patsubst %/,%,$(OUTPUT))
endif
endif
+ifneq ($(SKIP_LLVM),1)
ifeq ($(feature-llvm),1)
LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
@@ -180,13 +184,14 @@ ifeq ($(feature-llvm),1)
# Prefer linking statically if it's available, otherwise fallback to shared
ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)))
LLVM_LDLIBS += -lstdc++
else
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
endif
LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
endif
+endif
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
@@ -358,7 +363,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
endif
+ifneq ($(SKIP_DOCS),1)
all: docs
+endif
docs:
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
@@ -673,9 +680,6 @@ ifneq ($2:$(OUTPUT),:$(shell pwd))
$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
-$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS)
-$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS)
-
# some X.test.o files have runtime dependencies on Y.bpf.o files
$(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
@@ -686,7 +690,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
$(OUTPUT)/$(if $2,$2/)bpftool
@@ -811,6 +815,7 @@ $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.ske
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
+$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -831,6 +836,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_local_storage_create.o \
$(OUTPUT)/bench_htab_mem.o \
$(OUTPUT)/bench_bpf_crypto.o \
+ $(OUTPUT)/bench_sockmap.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 0fd8c9b0d38f..ddd73d06a1eb 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -283,6 +283,7 @@ extern struct argp bench_local_storage_create_argp;
extern struct argp bench_htab_mem_argp;
extern struct argp bench_trigger_batch_argp;
extern struct argp bench_crypto_argp;
+extern struct argp bench_sockmap_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -297,6 +298,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
+ { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
{},
};
@@ -555,6 +557,7 @@ extern const struct bench bench_local_storage_create;
extern const struct bench bench_htab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
+extern const struct bench bench_sockmap;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -621,6 +624,7 @@ static const struct bench *benchs[] = {
&bench_htab_mem,
&bench_crypto_encrypt,
&bench_crypto_decrypt,
+ &bench_sockmap,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
index 926ee822143e..297e32390cd1 100644
--- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -279,6 +279,7 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
}
got = read(fd, buf, sizeof(buf) - 1);
+ close(fd);
if (got <= 0) {
*value = 0;
return;
@@ -286,8 +287,6 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
buf[got] = 0;
*value = strtoull(buf, NULL, 0);
-
- close(fd);
}
static void htab_mem_measure(struct bench_res *res)
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
new file mode 100644
index 000000000000..8ebf563a67a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <argp.h>
+#include "bench.h"
+#include "bench_sockmap_prog.skel.h"
+
+#define FILE_SIZE (128 * 1024)
+#define DATA_REPEAT_SIZE 10
+
+static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+/* c1 <-> [p1, p2] <-> c2
+ * RX bench(BPF_SK_SKB_STREAM_VERDICT):
+ * ARG_FW_RX_PASS:
+ * send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2)
+ * ARG_FW_RX_VERDICT_EGRESS:
+ * send(c1) -> verdict skb to tx queuec of p2 -> recv(c2)
+ * ARG_FW_RX_VERDICT_INGRESS:
+ * send(c1) -> verdict skb to rx queuec of c2 -> recv(c2)
+ *
+ * TX bench(BPF_SK_MSG_VERDIC):
+ * ARG_FW_TX_PASS:
+ * send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2)
+ * ARG_FW_TX_VERDICT_INGRESS:
+ * send(p2) -> verdict msg to rx queue of c2 -> recv(c2)
+ * ARG_FW_TX_VERDICT_EGRESS:
+ * send(p1) -> verdict msg to tx queue of p2 -> recv(c2)
+ */
+enum SOCKMAP_ARG_FLAG {
+ ARG_FW_RX_NORMAL = 11000,
+ ARG_FW_RX_PASS,
+ ARG_FW_RX_VERDICT_EGRESS,
+ ARG_FW_RX_VERDICT_INGRESS,
+ ARG_FW_TX_NORMAL,
+ ARG_FW_TX_PASS,
+ ARG_FW_TX_VERDICT_INGRESS,
+ ARG_FW_TX_VERDICT_EGRESS,
+ ARG_CTL_RX_STRP,
+ ARG_CONSUMER_DELAY_TIME,
+ ARG_PRODUCER_DURATION,
+};
+
+#define TXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_TX_NORMAL)
+
+#define TXMODE_BPF_INGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS)
+
+#define TXMODE_BPF_EGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS)
+
+#define TXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_TX_PASS)
+
+#define TXMODE_BPF() ( \
+ TXMODE_BPF_PASS() || \
+ TXMODE_BPF_INGRESS() || \
+ TXMODE_BPF_EGRESS())
+
+#define TXMODE() ( \
+ TXMODE_NORMAL() || \
+ TXMODE_BPF())
+
+#define RXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_RX_NORMAL)
+
+#define RXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_RX_PASS)
+
+#define RXMODE_BPF_VERDICT_EGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS)
+
+#define RXMODE_BPF_VERDICT_INGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS)
+
+#define RXMODE_BPF_VERDICT() ( \
+ RXMODE_BPF_VERDICT_INGRESS() || \
+ RXMODE_BPF_VERDICT_EGRESS())
+
+#define RXMODE_BPF() ( \
+ RXMODE_BPF_PASS() || \
+ RXMODE_BPF_VERDICT())
+
+#define RXMODE() ( \
+ RXMODE_NORMAL() || \
+ RXMODE_BPF())
+
+static struct socmap_ctx {
+ struct bench_sockmap_prog *skel;
+ enum SOCKMAP_ARG_FLAG mode;
+ #define c1 fds[0]
+ #define p1 fds[1]
+ #define c2 fds[2]
+ #define p2 fds[3]
+ #define sfd fds[4]
+ int fds[5];
+ long send_calls;
+ long read_calls;
+ long prod_send;
+ long user_read;
+ int file_size;
+ int delay_consumer;
+ int prod_run_time;
+ int strp_size;
+} ctx = {
+ .prod_send = 0,
+ .user_read = 0,
+ .file_size = FILE_SIZE,
+ .mode = ARG_FW_RX_VERDICT_EGRESS,
+ .fds = {0},
+ .delay_consumer = 0,
+ .prod_run_time = 0,
+ .strp_size = 0,
+};
+
+static void bench_sockmap_prog_destroy(void)
+{
+ int i;
+
+ for (i = 0; i < sizeof(ctx.fds); i++) {
+ if (ctx.fds[0] > 0)
+ close(ctx.fds[i]);
+ }
+
+ bench_sockmap_prog__destroy(ctx.skel);
+}
+
+static void init_addr(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static bool set_non_block(int fd, bool blocking)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (flags == -1)
+ return false;
+ flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK);
+ return (fcntl(fd, F_SETFL, flags) == 0);
+}
+
+static int create_pair(int *c, int *p, int type)
+{
+ struct sockaddr_storage addr;
+ int err, cfd, pfd;
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len);
+ if (err) {
+ fprintf(stderr, "getsockname error %d\n", errno);
+ return err;
+ }
+ cfd = socket(AF_INET, type, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "socket error %d\n", errno);
+ return err;
+ }
+
+ err = connect(cfd, (struct sockaddr *)&addr, addr_len);
+ if (err && errno != EINPROGRESS) {
+ fprintf(stderr, "connect error %d\n", errno);
+ return err;
+ }
+
+ pfd = accept(ctx.sfd, NULL, NULL);
+ if (pfd < 0) {
+ fprintf(stderr, "accept error %d\n", errno);
+ return err;
+ }
+ *c = cfd;
+ *p = pfd;
+ return 0;
+}
+
+static int create_sockets(void)
+{
+ struct sockaddr_storage addr;
+ int err, one = 1;
+ socklen_t addr_len;
+
+ init_addr(&addr, &addr_len);
+ ctx.sfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (ctx.sfd < 0) {
+ fprintf(stderr, "socket error:%d\n", errno);
+ return ctx.sfd;
+ }
+ err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err) {
+ fprintf(stderr, "setsockopt error:%d\n", errno);
+ return err;
+ }
+
+ err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len);
+ if (err) {
+ fprintf(stderr, "bind error:%d\n", errno);
+ return err;
+ }
+
+ err = listen(ctx.sfd, SOMAXCONN);
+ if (err) {
+ fprintf(stderr, "listen error:%d\n", errno);
+ return err;
+ }
+
+ err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 1 error\n");
+ return err;
+ }
+
+ err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 2 error\n");
+ return err;
+ }
+ printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n",
+ ctx.c1, ctx.p1, ctx.c2, ctx.p2);
+ return 0;
+}
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
+ !env.affinity)
+ goto err;
+ return;
+err:
+ fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
+ exit(1);
+}
+
+static int setup_rx_sockmap(void)
+{
+ int verdict, pass, parser, map;
+ int zero = 0, one = 1;
+ int err;
+
+ parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser);
+ verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict);
+ pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass);
+ map = bpf_map__fd(ctx.skel->maps.sock_map_rx);
+
+ if (ctx.strp_size != 0) {
+ ctx.skel->bss->pkt_size = ctx.strp_size;
+ err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return err;
+ }
+
+ if (RXMODE_BPF_VERDICT())
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ else if (RXMODE_BPF_PASS())
+ err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (RXMODE_BPF_PASS())
+ return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST);
+
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ if (err < 0)
+ return err;
+
+ if (RXMODE_BPF_VERDICT_INGRESS()) {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ } else {
+ err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ }
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int setup_tx_sockmap(void)
+{
+ int zero = 0, one = 1;
+ int prog, map;
+ int err;
+
+ map = bpf_map__fd(ctx.skel->maps.sock_map_tx);
+ prog = TXMODE_BPF_PASS() ?
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) :
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict);
+
+ err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (TXMODE_BPF_EGRESS()) {
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ } else {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ }
+
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void setup(void)
+{
+ int err;
+
+ ctx.skel = bench_sockmap_prog__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ if (create_sockets()) {
+ fprintf(stderr, "create_net_mode error\n");
+ goto err;
+ }
+
+ if (RXMODE_BPF()) {
+ err = setup_rx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_rx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else if (TXMODE_BPF()) {
+ err = setup_tx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_tx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else {
+ fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode);
+ goto err;
+ }
+
+ return;
+
+err:
+ bench_sockmap_prog_destroy();
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->drops = atomic_swap(&ctx.prod_send, 0);
+ res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0);
+ res->false_hits = atomic_swap(&ctx.user_read, 0);
+ res->important_hits = atomic_swap(&ctx.send_calls, 0);
+ res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32;
+}
+
+static void verify_data(int *check_pos, char *buf, int rcv)
+{
+ for (int i = 0 ; i < rcv; i++) {
+ if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
+ fprintf(stderr, "verify data fail");
+ exit(1);
+ }
+ (*check_pos)++;
+ if (*check_pos >= FILE_SIZE)
+ *check_pos = 0;
+ }
+}
+
+static void *consumer(void *input)
+{
+ int rcv, sent;
+ int check_pos = 0;
+ int tid = (long)input;
+ int recv_buf_size = FILE_SIZE;
+ char *buf = malloc(recv_buf_size);
+ int delay_read = ctx.delay_consumer;
+
+ if (!buf) {
+ fprintf(stderr, "fail to init read buffer");
+ return NULL;
+ }
+
+ while (true) {
+ if (tid == 1) {
+ /* consumer 1 is unused for tx test and stream verdict test */
+ if (RXMODE_BPF() || TXMODE())
+ return NULL;
+ /* it's only for RX_NORMAL which service as reserve-proxy mode */
+ rcv = read(ctx.p1, buf, recv_buf_size);
+ if (rcv < 0) {
+ fprintf(stderr, "fail to read p1");
+ return NULL;
+ }
+
+ sent = send(ctx.p2, buf, recv_buf_size, 0);
+ if (sent < 0) {
+ fprintf(stderr, "fail to send p2");
+ return NULL;
+ }
+ } else {
+ if (delay_read != 0) {
+ if (delay_read < 0)
+ return NULL;
+ sleep(delay_read);
+ delay_read = 0;
+ }
+ /* read real endpoint by consumer 0 */
+ atomic_inc(&ctx.read_calls);
+ rcv = read(ctx.c2, buf, recv_buf_size);
+ if (rcv < 0 && errno != EAGAIN) {
+ fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
+ return NULL;
+ }
+ verify_data(&check_pos, buf, rcv);
+ atomic_add(&ctx.user_read, rcv);
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ int off = 0, fp, need_sent, sent;
+ int file_size = ctx.file_size;
+ struct timespec ts1, ts2;
+ int target;
+ FILE *file;
+
+ file = tmpfile();
+ if (!file) {
+ fprintf(stderr, "create file for sendfile");
+ return NULL;
+ }
+
+ /* we need simple verify */
+ for (int i = 0; i < file_size; i++) {
+ if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) {
+ fprintf(stderr, "init tmpfile error");
+ return NULL;
+ }
+ if (++off >= sizeof(snd_data))
+ off = 0;
+ }
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+
+ fp = fileno(file);
+ need_sent = file_size;
+ clock_gettime(CLOCK_MONOTONIC, &ts1);
+
+ if (RXMODE_BPF_VERDICT())
+ target = ctx.c1;
+ else if (TXMODE_BPF_EGRESS())
+ target = ctx.p1;
+ else
+ target = ctx.p2;
+ set_non_block(target, true);
+ while (true) {
+ if (ctx.prod_run_time) {
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time)
+ return NULL;
+ }
+
+ errno = 0;
+ atomic_inc(&ctx.send_calls);
+ sent = sendfile(target, fp, NULL, need_sent);
+ if (sent < 0) {
+ if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) {
+ fprintf(stderr, "sendfile return %d, errorno %d:%s\n",
+ sent, errno, strerror(errno));
+ return NULL;
+ }
+ continue;
+ } else if (sent < need_sent) {
+ need_sent -= sent;
+ atomic_add(&ctx.prod_send, sent);
+ continue;
+ }
+ atomic_add(&ctx.prod_send, need_sent);
+ need_sent = file_size;
+ lseek(fp, 0, SEEK_SET);
+ }
+
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz;
+
+ prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+ speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0);
+ bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0);
+ read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0);
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, "
+ "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n",
+ prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double verdict_mbs_mean = 0.0;
+ long verdict_total = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ verdict_total += res[i].hits / 1000000.0;
+ }
+
+ printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n",
+ verdict_total, verdict_mbs_mean);
+}
+
+static const struct argp_option opts[] = {
+ { "rx-normal", ARG_FW_RX_NORMAL, NULL, 0,
+ "simple reserve-proxy mode, no bfp enabled"},
+ { "rx-pass", ARG_FW_RX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "rx-strp", ARG_CTL_RX_STRP, "Byte", 0,
+ "enable strparser and set the encapsulation size"},
+ { "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "tx-normal", ARG_FW_TX_NORMAL, NULL, 0,
+ "simple c-s mode, no bfp enabled"},
+ { "tx-pass", ARG_FW_TX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0,
+ "forward msg to ingress queue of another socket"},
+ { "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0,
+ "forward msg to egress queue of another socket"},
+ { "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0,
+ "delay consumer start"},
+ { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
+ "producer duration"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS:
+ ctx.mode = key;
+ break;
+ case ARG_CONSUMER_DELAY_TIME:
+ ctx.delay_consumer = strtol(arg, NULL, 10);
+ break;
+ case ARG_PRODUCER_DURATION:
+ ctx.prod_run_time = strtol(arg, NULL, 10);
+ break;
+ case ARG_CTL_RX_STRP:
+ ctx.strp_size = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_sockmap_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_sockmap = {
+ .name = "sockmap",
+ .argp = &bench_sockmap_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 6535c8ae3c46..5e512a1d09d1 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -591,4 +591,9 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym
extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
+struct bpf_iter_dmabuf;
+extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3201a962b3dc..f74e1ea0ad3b 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
CONFIG_DUMMY=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_FPROBE=y
@@ -108,6 +110,7 @@ CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SYN_COOKIES=y
CONFIG_TEST_BPF=m
+CONFIG_UDMABUF=y
CONFIG_USERFAULTFD=y
CONFIG_VSOCKETS=y
CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
index 7565fc7690c2..0223fce4db2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -51,9 +51,11 @@ static void test_arena_spin_lock_size(int size)
struct arena_spin_lock *skel;
pthread_t thread_id[16];
int prog_fd, i, err;
+ int nthreads;
void *ret;
- if (get_nprocs() < 2) {
+ nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id));
+ if (nthreads < 2) {
test__skip();
return;
}
@@ -66,25 +68,25 @@ static void test_arena_spin_lock_size(int size)
goto end;
}
skel->bss->cs_count = size;
- skel->bss->limit = repeat * 16;
+ skel->bss->limit = repeat * nthreads;
- ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init");
+ ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init");
prog_fd = bpf_program__fd(skel->progs.prog);
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nthreads; i++) {
err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
if (!ASSERT_OK(err, "pthread_create"))
goto end_barrier;
}
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nthreads; i++) {
if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
goto end_barrier;
if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
goto end_barrier;
}
- ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value");
+ ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
end_barrier:
pthread_barrier_destroy(&barrier);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 329c7862b52d..cabc51c2ca6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -122,6 +122,85 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+/* attach uprobe/uretprobe long event name testings */
+static void test_attach_uprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
+ char path[PATH_MAX] = {0};
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
+
+ if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink"))
+ goto cleanup;
+
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ uprobe_opts.ref_ctr_offset = 0;
+ uprobe_opts.retprobe = false;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ path,
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ uprobe_opts.retprobe = true;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ path,
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uretprobe = uretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct test_attach_probe_manual *skel;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ /* manual-attach kprobe/kretprobe */
+ kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kprobe = kprobe_link;
+
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kretprobe = kretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
static void test_attach_probe_auto(struct test_attach_probe *skel)
{
struct bpf_link *uprobe_err_link;
@@ -323,6 +402,11 @@ void test_attach_probe(void)
if (test__start_subtest("uprobe-ref_ctr"))
test_uprobe_ref_ctr(skel);
+ if (test__start_subtest("uprobe-long_name"))
+ test_attach_uprobe_long_event_name();
+ if (test__start_subtest("kprobe-long_name"))
+ test_attach_kprobe_long_event_name();
+
cleanup:
test_attach_probe__destroy(skel);
ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index dbd13f8e42a7..dd6512fa652b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode)
.repeat = 1,
);
+ if (SYS_NOFAIL("iptables-legacy --version")) {
+ fprintf(stdout, "Missing required iptables-legacy tool\n");
+ test__skip();
+ return;
+ }
+
skel = test_bpf_nf__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index d9024c7a892a..5bc15bb6b7ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -440,6 +440,105 @@ cleanup:
btf__free(btf1);
}
+/* Ensure module split BTF dedup worked correctly; when dedup fails badly
+ * core kernel types are in split BTF also, so ensure that references to
+ * such types point at base - not split - BTF.
+ *
+ * bpf_testmod_test_write() has multiple core kernel type parameters;
+ *
+ * ssize_t
+ * bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ * struct bin_attribute *bin_attr,
+ * char *buf, loff_t off, size_t len);
+ *
+ * Ensure each of the FUNC_PROTO params is a core kernel type.
+ *
+ * Do the same for
+ *
+ * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk);
+ *
+ * ...and
+ *
+ * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb);
+ *
+ */
+const char *mod_funcs[] = {
+ "bpf_testmod_test_write",
+ "bpf_kfunc_call_test3",
+ "bpf_kfunc_call_test_pass_ctx"
+};
+
+static void test_split_module(void)
+{
+ struct btf *vmlinux_btf, *btf1 = NULL;
+ int i, nr_base_types;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf"))
+ return;
+ nr_base_types = btf__type_cnt(vmlinux_btf);
+ if (!ASSERT_GT(nr_base_types, 0, "nr_base_types"))
+ goto cleanup;
+
+ btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf);
+ if (!ASSERT_OK_PTR(btf1, "split_btf"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) {
+ const struct btf_param *p;
+ const struct btf_type *t;
+ __u16 vlen;
+ __u32 id;
+ int j;
+
+ id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC);
+ if (!ASSERT_GE(id, nr_base_types, "func_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "func_id_type"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, t->type);
+ if (!ASSERT_OK_PTR(t, "func_proto_id_type"))
+ goto cleanup;
+ if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto"))
+ goto cleanup;
+ vlen = btf_vlen(t);
+
+ for (j = 0, p = btf_params(t); j < vlen; j++, p++) {
+ /* bpf_testmod uses resilient split BTF, so any
+ * reference types will be added to split BTF and their
+ * associated targets will be base BTF types; for example
+ * for a "struct sock *" the PTR will be in split BTF
+ * while the "struct sock" will be in base.
+ *
+ * In some cases like loff_t we have to resolve
+ * multiple typedefs hence the while() loop below.
+ *
+ * Note that resilient split BTF generation depends
+ * on pahole version, so we do not assert that
+ * reference types are in split BTF, as if pahole
+ * does not support resilient split BTF they will
+ * also be base BTF types.
+ */
+ id = p->type;
+ do {
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "param_ref_type"))
+ goto cleanup;
+ if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t))
+ break;
+ id = t->type;
+ } while (true);
+
+ if (!ASSERT_LT(id, nr_base_types, "verify_base_type"))
+ goto cleanup;
+ }
+ }
+cleanup:
+ btf__free(btf1);
+ btf__free(vmlinux_btf);
+}
+
void test_btf_dedup_split()
{
if (test__start_subtest("split_simple"))
@@ -450,4 +549,6 @@ void test_btf_dedup_split()
test_split_fwd_resolve();
if (test__start_subtest("split_dup_struct_in_cu"))
test_split_dup_struct_in_cu();
+ if (test__start_subtest("split_module"))
+ test_split_module();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index eef1158676ed..3696fb9a05ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,10 +12,11 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
vfprintf(ctx, fmt, args);
}
-void test_btf_split() {
+static void __test_btf_split(bool multi)
+{
struct btf_dump *d = NULL;
const struct btf_type *t;
- struct btf *btf1, *btf2;
+ struct btf *btf1, *btf2, *btf3 = NULL;
int str_off, i, err;
btf1 = btf__new_empty();
@@ -63,14 +64,46 @@ void test_btf_split() {
ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+ if (multi) {
+ btf3 = btf__new_empty_split(btf2);
+ if (!ASSERT_OK_PTR(btf3, "multi_split_btf"))
+ goto cleanup;
+ } else {
+ btf3 = btf2;
+ }
+
+ btf__add_union(btf3, "u1", 16); /* [5] union u1 { */
+ btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */
+ btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */
+ /* } */
+
+ if (multi) {
+ t = btf__type_by_id(btf2, 5);
+ ASSERT_NULL(t, "multisplit_type_in_first_split");
+ }
+
+ t = btf__type_by_id(btf3, 5);
+ if (!ASSERT_OK_PTR(t, "split_union_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_union(t), true, "split_union_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name");
+ ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt");
+
+ t = btf__type_by_id(btf3, 1);
+ if (!ASSERT_OK_PTR(t, "split_base_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_int(t), true, "split_base_int");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name");
+
/* BTF-to-C dump of split BTF */
dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
return;
- d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL);
+ d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL);
if (!ASSERT_OK_PTR(d, "btf_dump__new"))
goto cleanup;
- for (i = 1; i < btf__type_cnt(btf2); i++) {
+ for (i = 1; i < btf__type_cnt(btf3); i++) {
err = btf_dump__dump_type(d, i);
ASSERT_OK(err, "dump_type_ok");
}
@@ -79,12 +112,15 @@ void test_btf_split() {
ASSERT_STREQ(dump_buf,
"struct s1 {\n"
" int f1;\n"
-"};\n"
-"\n"
+"};\n\n"
"struct s2 {\n"
" struct s1 f1;\n"
" int f2;\n"
" int *f3;\n"
+"};\n\n"
+"union u1 {\n"
+" struct s2 f1;\n"
+" int uf2;\n"
"};\n\n", "c_dump");
cleanup:
@@ -94,4 +130,14 @@ cleanup:
btf_dump__free(d);
btf__free(btf1);
btf__free(btf2);
+ if (btf2 != btf3)
+ btf__free(btf3);
+}
+
+void test_btf_split(void)
+{
+ if (test__start_subtest("single_split"))
+ __test_btf_split(false);
+ if (test__start_subtest("multi_split"))
+ __test_btf_split(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
new file mode 100644
index 000000000000..3923e64c4c1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2025 Isovalent */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static void test_btf_mmap_sysfs(const char *path, struct btf *base)
+{
+ struct stat st;
+ __u64 btf_size, end;
+ void *raw_data = NULL;
+ int fd = -1;
+ long page_size;
+ struct btf *btf = NULL;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (!ASSERT_GE(page_size, 0, "get_page_size"))
+ goto cleanup;
+
+ if (!ASSERT_OK(stat(path, &st), "stat_btf"))
+ goto cleanup;
+
+ btf_size = st.st_size;
+ end = (btf_size + page_size - 1) / page_size * page_size;
+
+ fd = open(path, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_btf"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
+ "mprotect_writable"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
+ "mprotect_executable"))
+ goto cleanup;
+
+ /* Check padding is zeroed */
+ for (int i = btf_size; i < end; i++) {
+ if (((__u8 *)raw_data)[i] != 0) {
+ PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
+ goto cleanup;
+ }
+ }
+
+ btf = btf__new_split(raw_data, btf_size, base);
+ if (!ASSERT_OK_PTR(btf, "parse_btf"))
+ goto cleanup;
+
+cleanup:
+ btf__free(btf);
+ if (raw_data && raw_data != MAP_FAILED)
+ munmap(raw_data, btf_size);
+ if (fd >= 0)
+ close(fd);
+}
+
+void test_btf_sysfs(void)
+{
+ test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
new file mode 100644
index 000000000000..6c2b0c3dbcd8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "dmabuf_iter.skel.h"
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/udmabuf.h>
+
+static int udmabuf = -1;
+static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter";
+static size_t udmabuf_test_buffer_size;
+static int sysheap_dmabuf = -1;
+static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter";
+static size_t sysheap_test_buffer_size;
+
+static int create_udmabuf(void)
+{
+ struct udmabuf_create create;
+ int dev_udmabuf, memfd, local_udmabuf;
+
+ udmabuf_test_buffer_size = 10 * getpagesize();
+
+ if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING);
+ if (!ASSERT_OK_FD(memfd, "memfd_create"))
+ return -1;
+
+ if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal"))
+ goto close_memfd;
+
+ dev_udmabuf = open("/dev/udmabuf", O_RDONLY);
+ if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf"))
+ goto close_memfd;
+
+ memset(&create, 0, sizeof(create));
+ create.memfd = memfd;
+ create.flags = UDMABUF_FLAGS_CLOEXEC;
+ create.offset = 0;
+ create.size = udmabuf_test_buffer_size;
+
+ local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create);
+ close(dev_udmabuf);
+ if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name"))
+ goto close_udmabuf;
+
+ return local_udmabuf;
+
+close_udmabuf:
+ close(local_udmabuf);
+close_memfd:
+ close(memfd);
+ return -1;
+}
+
+static int create_sys_heap_dmabuf(void)
+{
+ sysheap_test_buffer_size = 20 * getpagesize();
+
+ struct dma_heap_allocation_data data = {
+ .len = sysheap_test_buffer_size,
+ .fd = 0,
+ .fd_flags = O_RDWR | O_CLOEXEC,
+ .heap_flags = 0,
+ };
+ int heap_fd, ret;
+
+ if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ heap_fd = open("/dev/dma_heap/system", O_RDONLY);
+ if (!ASSERT_OK_FD(heap_fd, "open dma heap"))
+ return -1;
+
+ ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data);
+ close(heap_fd);
+ if (!ASSERT_OK(ret, "syheap alloc"))
+ return -1;
+
+ if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name"))
+ goto close_sysheap_dmabuf;
+
+ return data.fd;
+
+close_sysheap_dmabuf:
+ close(data.fd);
+ return -1;
+}
+
+static int create_test_buffers(void)
+{
+ udmabuf = create_udmabuf();
+ sysheap_dmabuf = create_sys_heap_dmabuf();
+
+ if (udmabuf < 0 || sysheap_dmabuf < 0)
+ return -1;
+
+ return 0;
+}
+
+static void destroy_test_buffers(void)
+{
+ close(udmabuf);
+ udmabuf = -1;
+
+ close(sysheap_dmabuf);
+ sysheap_dmabuf = -1;
+}
+
+enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT };
+struct DmabufInfo {
+ unsigned long inode;
+ unsigned long size;
+ char name[DMA_BUF_NAME_LEN];
+ char exporter[32];
+};
+
+static bool check_dmabuf_info(const struct DmabufInfo *bufinfo,
+ unsigned long size,
+ const char *name, const char *exporter)
+{
+ return size == bufinfo->size &&
+ !strcmp(name, bufinfo->name) &&
+ !strcmp(exporter, bufinfo->exporter);
+}
+
+static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel)
+{
+ int iter_fd;
+ char buf[256];
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ while (read(iter_fd, buf, sizeof(buf)) > 0)
+ ; /* Read out all contents */
+
+ /* Next reads should return 0 */
+ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
+
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel)
+{
+ bool found_test_sysheap_dmabuf = false;
+ bool found_test_udmabuf = false;
+ struct DmabufInfo bufinfo;
+ size_t linesize = 0;
+ char *line = NULL;
+ FILE *iter_file;
+ int iter_fd, f = INODE;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ iter_file = fdopen(iter_fd, "r");
+ if (!ASSERT_OK_PTR(iter_file, "fdopen"))
+ goto close_iter_fd;
+
+ while (getline(&line, &linesize, iter_file) != -1) {
+ if (f % FIELD_COUNT == INODE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1,
+ "read inode");
+ } else if (f % FIELD_COUNT == SIZE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1,
+ "read size");
+ } else if (f % FIELD_COUNT == NAME) {
+ ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1,
+ "read name");
+ } else if (f % FIELD_COUNT == EXPORTER) {
+ ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1,
+ "read exporter");
+
+ if (check_dmabuf_info(&bufinfo,
+ sysheap_test_buffer_size,
+ sysheap_test_buffer_name,
+ "system"))
+ found_test_sysheap_dmabuf = true;
+ else if (check_dmabuf_info(&bufinfo,
+ udmabuf_test_buffer_size,
+ udmabuf_test_buffer_name,
+ "udmabuf"))
+ found_test_udmabuf = true;
+ }
+ ++f;
+ }
+
+ ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields");
+
+ ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf");
+ ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf");
+
+ free(line);
+ fclose(iter_file);
+close_iter_fd:
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ char key[DMA_BUF_NAME_LEN];
+ int err, fd;
+ bool found;
+
+ /* No need to attach it, just run it directly */
+ fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each);
+
+ err = bpf_prog_test_run_opts(fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key"))
+ return;
+
+ do {
+ ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup");
+ ASSERT_TRUE(found, "found test buffer");
+ } while (bpf_map_get_next_key(map_fd, key, key));
+}
+
+void test_dmabuf_iter(void)
+{
+ struct dmabuf_iter *skel = NULL;
+ int map_fd;
+ const bool f = false;
+
+ skel = dmabuf_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.testbuf_hash);
+ if (!ASSERT_OK_FD(map_fd, "map_fd"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY),
+ "insert udmabuf"))
+ goto destroy_skel;
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY),
+ "insert sysheap buffer"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(create_test_buffers(), "create_test_buffers"))
+ goto destroy;
+
+ if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach"))
+ goto destroy;
+
+ if (test__start_subtest("no_infinite_reads"))
+ subtest_dmabuf_iter_check_no_infinite_reads(skel);
+ if (test__start_subtest("default_iter"))
+ subtest_dmabuf_iter_check_default_iter(skel);
+ if (test__start_subtest("open_coded"))
+ subtest_dmabuf_iter_check_open_coded(skel, map_fd);
+
+destroy:
+ destroy_test_buffers();
+destroy_skel:
+ dmabuf_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index e29cc16124c2..62e7ec775f24 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -33,10 +33,19 @@ static struct {
{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
+ {"test_probe_read_user_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG},
+ {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
+ char user_data[384] = {[0 ... 382] = 'a', '\0'};
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
@@ -58,6 +67,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
if (!ASSERT_OK(err, "dynptr_success__load"))
goto cleanup;
+ skel->bss->user_ptr = user_data;
+ skel->data->test_len[0] = sizeof(user_data);
+ memcpy(skel->bss->expected_str, user_data, sizeof(user_data));
+
switch (setup_type) {
case SETUP_SYSCALL_SLEEP:
link = bpf_program__attach(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
new file mode 100644
index 000000000000..ca46fdd6e1ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <test_progs.h>
+#include "fd_htab_lookup.skel.h"
+
+struct htab_op_ctx {
+ int fd;
+ int loop;
+ unsigned int entries;
+ bool stop;
+};
+
+#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err)))
+
+static void *htab_lookup_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0, value;
+ int inner_fd, err;
+
+ err = bpf_map_lookup_elem(ctx->fd, &key, &value);
+ if (err) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, err);
+ }
+
+ inner_fd = bpf_map_get_fd_by_id(value);
+ if (inner_fd < 0) {
+ /* The old map has been freed */
+ if (inner_fd == -ENOENT)
+ continue;
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, inner_fd);
+ }
+
+ err = bpf_map_lookup_elem(inner_fd, &zero, &value);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+
+ if (value != key) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(4, -EINVAL);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (inner_fd < 0) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, inner_fd);
+ }
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, err);
+ }
+
+ err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+ }
+ }
+
+ return NULL;
+}
+
+static int setup_htab(int fd, unsigned int entries)
+{
+ unsigned int i;
+
+ for (i = 0; i < entries; i++) {
+ unsigned int key = i, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "new array"))
+ return -1;
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (!ASSERT_OK(err, "init array")) {
+ close(inner_fd);
+ return -1;
+ }
+
+ err = bpf_map_update_elem(fd, &key, &inner_fd, 0);
+ if (!ASSERT_OK(err, "init outer")) {
+ close(inner_fd);
+ return -1;
+ }
+ close(inner_fd);
+ }
+
+ return 0;
+}
+
+static int get_int_from_env(const char *name, int dft)
+{
+ const char *value;
+
+ value = getenv(name);
+ if (!value)
+ return dft;
+
+ return atoi(value);
+}
+
+void test_fd_htab_lookup(void)
+{
+ unsigned int i, wr_nr = 8, rd_nr = 16;
+ pthread_t tids[wr_nr + rd_nr];
+ struct fd_htab_lookup *skel;
+ struct htab_op_ctx ctx;
+ int err;
+
+ skel = fd_htab_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.outer_map);
+ ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5);
+ ctx.stop = false;
+ ctx.entries = 8;
+
+ err = setup_htab(ctx.fd, ctx.entries);
+ if (err)
+ goto destroy;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < wr_nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+ for (i = 0; i < rd_nr; i++) {
+ err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+
+reap:
+ for (i = 0; i < wr_nr + rd_nr; i++) {
+ void *ret = NULL;
+ char desc[32];
+
+ if (!tids[i])
+ continue;
+
+ snprintf(desc, sizeof(desc), "thread %u", i + 1);
+ err = pthread_join(tids[i], &ret);
+ ASSERT_OK(err, desc);
+ ASSERT_EQ(ret, NULL, desc);
+ }
+destroy:
+ fd_htab_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index e59af2aa6601..e40114620751 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -37,6 +37,7 @@ static noinline void uprobe_func(void)
static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
ssize_t offset, ssize_t entry_offset)
{
+ ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */;
struct bpf_link_info info;
__u32 len = sizeof(info);
char buf[PATH_MAX];
@@ -97,6 +98,7 @@ again:
case BPF_PERF_EVENT_UPROBE:
case BPF_PERF_EVENT_URETPROBE:
ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset");
+ ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset");
ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1,
"name_len");
@@ -241,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
.retprobe = type == BPF_PERF_EVENT_URETPROBE,
.bpf_cookie = PERF_EVENT_COOKIE,
);
+ const char *sema[1] = {
+ "uprobe_link_info_sema_1",
+ };
+ __u64 *ref_ctr_offset;
struct bpf_link *link;
int link_fd, err;
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema,
+ (unsigned long **) &ref_ctr_offset, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+ return;
+
+ opts.ref_ctr_offset = *ref_ctr_offset;
link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
0, /* self pid */
UPROBE_FILE, uprobe_offset,
&opts);
if (!ASSERT_OK_PTR(link, "attach_uprobe"))
- return;
+ goto out;
link_fd = bpf_link__fd(link);
- err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0);
+ err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset);
ASSERT_OK(err, "verify_perf_link_info");
bpf_link__destroy(link);
+out:
+ free(ref_ctr_offset);
}
static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 8e13a3416a21..1de14b111931 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -104,7 +104,7 @@ void test_kmem_cache_iter(void)
goto destroy;
memset(buf, 0, sizeof(buf));
- while (read(iter_fd, buf, sizeof(buf) > 0)) {
+ while (read(iter_fd, buf, sizeof(buf)) > 0) {
/* Read out all contents */
printf("%s", buf);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 77d07e0a4a55..5266c7022863 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -7,6 +7,7 @@
#include "linked_list.skel.h"
#include "linked_list_fail.skel.h"
+#include "linked_list_peek.skel.h"
static char log_buf[1024 * 1024];
@@ -805,3 +806,8 @@ void test_linked_list(void)
test_linked_list_success(LIST_IN_LIST, true);
test_linked_list_success(TEST_ALL, false);
}
+
+void test_linked_list_peek(void)
+{
+ RUN_TESTS(linked_list_peek);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
index 9818f06c97c5..d8f3d7a45fe9 100644
--- a/tools/testing/selftests/bpf/prog_tests/rbtree.c
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -8,6 +8,7 @@
#include "rbtree_fail.skel.h"
#include "rbtree_btf_fail__wrong_node_type.skel.h"
#include "rbtree_btf_fail__add_wrong_type.skel.h"
+#include "rbtree_search.skel.h"
static void test_rbtree_add_nodes(void)
{
@@ -187,3 +188,8 @@ void test_rbtree_fail(void)
{
RUN_TESTS(rbtree_fail);
}
+
+void test_rbtree_search(void)
+{
+ RUN_TESTS(rbtree_search);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 0b9bd1d6f7cc..10a0ab954b8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -37,8 +37,10 @@ configure_stack(void)
tc = popen("tc -V", "r");
if (CHECK_FAIL(!tc))
return false;
- if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
+ if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) {
+ pclose(tc);
return false;
+ }
if (strstr(tc_version, ", libbpf "))
prog = "test_sk_assign_libbpf.bpf.o";
else
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
index 1bdfb79ef009..e02cabcc814e 100644
--- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -3,6 +3,7 @@
#ifndef __SOCKET_HELPERS__
#define __SOCKET_HELPERS__
+#include <sys/un.h>
#include <linux/vm_sockets.h>
/* include/linux/net.h */
@@ -169,6 +170,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss,
*len = sizeof(*addr6);
}
+static inline void init_addr_loopback_unix(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->sun_family = AF_UNIX;
+ *len = sizeof(sa_family_t);
+}
+
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
@@ -190,6 +200,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
case AF_INET6:
init_addr_loopback6(ss, len);
return;
+ case AF_UNIX:
+ init_addr_loopback_unix(ss, len);
+ return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
@@ -315,21 +328,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1)
{
__close_fd int s, c = -1, p = -1;
struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
+ socklen_t len;
int err;
s = socket_loopback(family, sotype);
if (s < 0)
return s;
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- return err;
-
c = xsocket(family, sotype, 0);
if (c < 0)
return c;
+ init_addr_loopback(family, &addr, &len);
+ err = xbind(c, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
err = connect(c, sockaddr(&addr), len);
if (err) {
if (errno != EINPROGRESS) {
@@ -391,4 +410,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
return err;
}
+static inline const char *socket_kind_to_str(int sock_fd)
+{
+ socklen_t opt_len;
+ int domain, type;
+
+ opt_len = sizeof(domain);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_DOMAIN)");
+
+ opt_len = sizeof(type);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_TYPE)");
+
+ switch (domain) {
+ case AF_INET:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp4";
+ case SOCK_DGRAM:
+ return "udp4";
+ }
+ break;
+ case AF_INET6:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp6";
+ case SOCK_DGRAM:
+ return "udp6";
+ }
+ break;
+ case AF_UNIX:
+ switch (type) {
+ case SOCK_STREAM:
+ return "u_str";
+ case SOCK_DGRAM:
+ return "u_dgr";
+ case SOCK_SEQPACKET:
+ return "u_seq";
+ }
+ break;
+ case AF_VSOCK:
+ switch (type) {
+ case SOCK_STREAM:
+ return "v_str";
+ case SOCK_DGRAM:
+ return "v_dgr";
+ case SOCK_SEQPACKET:
+ return "v_seq";
+ }
+ break;
+ }
+
+ return "???";
+}
+
#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 3e5571dd578d..d815efac52fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -5,12 +5,15 @@
#define MAX_TEST_NAME 80
+#define u32(v) ((u32){(v)})
+#define u64(v) ((u64){(v)})
+
#define __always_unused __attribute__((__unused__))
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -18,7 +21,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -26,7 +29,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -35,7 +38,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -43,7 +46,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -66,21 +69,15 @@
__ret; \
})
-static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+static inline int add_to_sockmap(int mapfd, int fd1, int fd2)
{
- u64 value;
- u32 key;
int err;
- key = 0;
- value = fd1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST);
if (err)
return err;
- key = 1;
- value = fd2;
- return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST);
}
#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 0a99fd404f6d..b6c471da5c28 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -3,76 +3,62 @@
/*
* Tests for sockmap/sockhash holding kTLS sockets.
*/
-
+#include <error.h>
#include <netinet/tcp.h>
+#include <linux/tls.h>
#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_ktls.skel.h"
#define MAX_TEST_NAME 80
#define TCP_ULP 31
-static int tcp_server(int family)
+static int init_ktls_pairs(int c, int p)
{
- int err, s;
-
- s = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(s, 0, "socket"))
- return -1;
-
- err = listen(s, SOMAXCONN);
- if (!ASSERT_OK(err, "listen"))
- return -1;
-
- return s;
-}
+ int err;
+ struct tls12_crypto_info_aes_gcm_128 crypto_rx;
+ struct tls12_crypto_info_aes_gcm_128 crypto_tx;
-static int disconnect(int fd)
-{
- struct sockaddr unspec = { AF_UNSPEC };
+ err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
- return connect(fd, &unspec, sizeof(unspec));
+ err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
+
+ memset(&crypto_rx, 0, sizeof(crypto_rx));
+ memset(&crypto_tx, 0, sizeof(crypto_tx));
+ crypto_rx.info.version = TLS_1_2_VERSION;
+ crypto_tx.info.version = TLS_1_2_VERSION;
+ crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
+ goto out;
+
+ err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
+ goto out;
+ return 0;
+out:
+ return -1;
}
-/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
-static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+static int create_ktls_pairs(int family, int sotype, int *c, int *p)
{
- struct sockaddr_storage addr = {0};
- socklen_t len = sizeof(addr);
- int err, cli, srv, zero = 0;
-
- srv = tcp_server(family);
- if (srv == -1)
- return;
-
- err = getsockname(srv, (struct sockaddr *)&addr, &len);
- if (!ASSERT_OK(err, "getsockopt"))
- goto close_srv;
+ int err;
- cli = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(cli, 0, "socket"))
- goto close_srv;
-
- err = connect(cli, (struct sockaddr *)&addr, len);
- if (!ASSERT_OK(err, "connect"))
- goto close_cli;
-
- err = bpf_map_update_elem(map, &zero, &cli, 0);
- if (!ASSERT_OK(err, "bpf_map_update_elem"))
- goto close_cli;
-
- err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
- if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
- goto close_cli;
-
- err = bpf_map_delete_elem(map, &zero);
- if (!ASSERT_OK(err, "bpf_map_delete_elem"))
- goto close_cli;
-
- err = disconnect(cli);
+ err = create_pair(family, sotype, c, p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ return -1;
-close_cli:
- close(cli);
-close_srv:
- close(srv);
+ err = init_ktls_pairs(*c, *p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ return -1;
+ return 0;
}
static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
@@ -145,6 +131,189 @@ static const char *fmt_test_name(const char *subtest_name, int family,
return test_name;
}
+static void test_sockmap_ktls_offload(int family, int sotype)
+{
+ int err;
+ int c = 0, p = 0, sent, recvd;
+ char msg[12] = "hello world\0";
+ char rcv[13];
+
+ err = create_ktls_pairs(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_ktls_pairs()"))
+ goto out;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_OK(err, "send(msg)"))
+ goto out;
+
+ recvd = recv(p, rcv, sizeof(rcv), 0);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sent, "length mismatch"))
+ goto out;
+
+ ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch");
+
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+}
+
+static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push)
+{
+ int err, off;
+ int i, j;
+ int start_push = 0, push_len = 0;
+ int c = 0, p = 0, one = 1, sent, recvd;
+ int prog_fd, map_fd;
+ char msg[12] = "hello world\0";
+ char rcv[20] = {0};
+ struct test_sockmap_ktls *skel;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ skel->bss->cork_byte = sizeof(msg);
+ if (push) {
+ start_push = 1;
+ push_len = 2;
+ }
+ skel->bss->push_start = start_push;
+ skel->bss->push_end = push_len;
+
+ off = sizeof(msg) / 2;
+ sent = send(c, msg, off, 0);
+ if (!ASSERT_EQ(sent, off, "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "expected no data"))
+ goto out;
+
+ /* send remaining msg */
+ sent = send(c, msg + off, sizeof(msg) - off, 0);
+ if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch"))
+ goto out;
+
+ for (i = 0, j = 0; i < recvd;) {
+ /* skip checking the data that has been pushed in */
+ if (i >= start_push && i <= start_push + push_len - 1) {
+ i++;
+ continue;
+ }
+ if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch"))
+ goto out;
+ i++;
+ j++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push)
+{
+ int c = -1, p = -1, one = 1, two = 2;
+ struct test_sockmap_ktls *skel;
+ unsigned char *data = NULL;
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ int prog_fd, map_fd;
+ int txrx_buf = 1024;
+ int iov_length = 8192;
+ int err;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int));
+ err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int));
+ if (!ASSERT_OK(err, "set buf limit"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out;
+
+ skel->bss->apply_bytes = 1024;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ data = calloc(iov_length, sizeof(char));
+ if (!data)
+ goto out;
+
+ iov[0].iov_base = data;
+ iov[0].iov_len = iov_length;
+ iov[1].iov_base = data;
+ iov[1].iov_len = iov_length;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 2;
+
+ for (;;) {
+ err = sendmsg(c, &msg, MSG_DONTWAIT);
+ if (err <= 0)
+ break;
+ }
+
+out:
+ if (data)
+ free(data);
+ if (c != -1)
+ close(c);
+ if (p != -1)
+ close(p);
+
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
int map;
@@ -153,18 +322,30 @@ static void run_tests(int family, enum bpf_map_type map_type)
if (!ASSERT_GE(map, 0, "bpf_map_create"))
return;
- if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
- test_sockmap_ktls_disconnect_after_delete(family, map);
if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
close(map);
}
+static void run_ktls_test(int family, int sotype)
+{
+ if (test__start_subtest("tls simple offload"))
+ test_sockmap_ktls_offload(family, sotype);
+ if (test__start_subtest("tls tx cork"))
+ test_sockmap_ktls_tx_cork(family, sotype, false);
+ if (test__start_subtest("tls tx cork with push"))
+ test_sockmap_ktls_tx_cork(family, sotype, true);
+ if (test__start_subtest("tls tx egress with no buf"))
+ test_sockmap_ktls_tx_no_buf(family, sotype, true);
+}
+
void test_sockmap_ktls(void)
{
run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+ run_ktls_test(AF_INET, SOCK_STREAM);
+ run_ktls_test(AF_INET6, SOCK_STREAM);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 4ee1148d22be..1d98eee7a2c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1366,237 +1366,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
-static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
- int sock_mapfd, int nop_mapfd,
- int verd_mapfd, enum redir_mode mode,
- int send_flags)
-{
- const char *log_prefix = redir_mode_str(mode);
- unsigned int pass;
- int err, n;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
-
- err = add_to_sockmap(sock_mapfd, peer0, peer1);
- if (err)
- return;
-
- if (nop_mapfd >= 0) {
- err = add_to_sockmap(nop_mapfd, cli0, cli1);
- if (err)
- return;
- }
-
- /* Last byte is OOB data when send_flags has MSG_OOB bit set */
- n = xsend(cli1, "ab", 2, send_flags);
- if (n >= 0 && n < 2)
- FAIL("%s: incomplete send", log_prefix);
- if (n < 2)
- return;
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- return;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
- if (n < 0)
- FAIL_ERRNO("%s: recv_timeout", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
-
- if (send_flags & MSG_OOB) {
- /* Check that we can't read OOB while in sockmap */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EOPNOTSUPP)
- FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
- log_prefix, n, errno);
-
- /* Remove peer1 from sockmap */
- xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
-
- /* Check that OOB was dropped on redirect */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EINVAL)
- FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
- log_prefix, n, errno);
- }
-}
-
-static void unix_redir_to_connected(int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- goto close0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int sotype)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_UNIX);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- unix_skb_redir_to_connected(skel, map, sotype);
-}
-
-/* Returns two connected loopback vsock sockets */
-static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
-{
- return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
-}
-
-static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
- enum redir_mode mode, int sotype)
-{
- const char *log_prefix = redir_mode_str(mode);
- char a = 'a', b = 'b';
- int u0, u1, v0, v1;
- int sfd[2];
- unsigned int pass;
- int err, n;
- u32 key;
-
- zero_verdict_count(verd_mapfd);
-
- if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
- return;
-
- u0 = sfd[0];
- u1 = sfd[1];
-
- err = vsock_socketpair_connectible(sotype, &v0, &v1);
- if (err) {
- FAIL("vsock_socketpair_connectible() failed");
- goto close_uds;
- }
-
- err = add_to_sockmap(sock_mapfd, u0, v0);
- if (err) {
- FAIL("add_to_sockmap failed");
- goto close_vsock;
- }
-
- n = write(v1, &a, sizeof(a));
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
- goto out;
-
- n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
- if (n < 0)
- FAIL("%s: recv() err, errno=%d", log_prefix, errno);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
- if (b != a)
- FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- goto out;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-out:
- key = 0;
- bpf_map_delete_elem(sock_mapfd, &key);
- key = 1;
- bpf_map_delete_elem(sock_mapfd, &key);
-
-close_vsock:
- close(v0);
- close(v1);
-
-close_uds:
- close(u0);
- close(u1);
-}
-
-static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map,
- int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
- skel->bss->test_ingress = true;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_VSOCK);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
- vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
-}
-
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1637,224 +1406,6 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static int inet_socketpair(int family, int type, int *s, int *c)
-{
- return create_pair(family, type | SOCK_NONBLOCK, s, c);
-}
-
-static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
- enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int err;
-
- err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
- if (err)
- return;
- err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
- if (err)
- goto close_cli0;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- udp_skb_redir_to_connected(skel, map, family);
-}
-
-static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- err = inet_socketpair(family, type, &p1, &c1);
- if (err)
- goto close;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close:
- xclose(c0);
- xclose(p0);
-}
-
-static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_EGRESS);
- skel->bss->test_ingress = true;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
- int nop_mapfd, int verd_mapfd,
- enum redir_mode mode, int send_flags)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- err = inet_socketpair(family, type, &p0, &c0);
- if (err)
- return;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- goto close_cli0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
- verd_mapfd, mode, send_flags);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int nop_map = bpf_map__fd(skel->maps.nop_map);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, MSG_OOB);
-
- skel->bss->test_ingress = true;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, MSG_OOB);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- struct netns_obj *netns;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- netns = netns_new("sockmap_listen", true);
- if (!ASSERT_OK_PTR(netns, "netns_new"))
- return;
-
- inet_unix_skb_redir_to_connected(skel, map, family);
- unix_inet_skb_redir_to_connected(skel, map, family);
-
- netns_free(netns);
-}
-
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1863,8 +1414,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
- test_udp_redir(skel, map, family);
- test_udp_unix_redir(skel, map, family);
}
void serial_test_sockmap_listen(void)
@@ -1880,16 +1429,10 @@ void serial_test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_map);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_hash);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
new file mode 100644
index 000000000000..9c461d93113d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for sockmap/sockhash redirection.
+ *
+ * BPF_MAP_TYPE_SOCKMAP
+ * BPF_MAP_TYPE_SOCKHASH
+ * x
+ * sk_msg-to-egress
+ * sk_msg-to-ingress
+ * sk_skb-to-egress
+ * sk_skb-to-ingress
+ * x
+ * AF_INET, SOCK_STREAM
+ * AF_INET6, SOCK_STREAM
+ * AF_INET, SOCK_DGRAM
+ * AF_INET6, SOCK_DGRAM
+ * AF_UNIX, SOCK_STREAM
+ * AF_UNIX, SOCK_DGRAM
+ * AF_VSOCK, SOCK_STREAM
+ * AF_VSOCK, SOCK_SEQPACKET
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <linux/string.h>
+#include <linux/vm_sockets.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "linux/const.h"
+#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_sockmap_redir.skel.h"
+
+/* The meaning of SUPPORTED is "will redirect packet as expected".
+ */
+#define SUPPORTED _BITUL(0)
+
+/* Note on sk_skb-to-ingress ->af_vsock:
+ *
+ * Peer socket may receive the packet some time after the return from sendmsg().
+ * In a typical usage scenario, recvmsg() will block until the redirected packet
+ * appears in the destination queue, or timeout if the packet was dropped. By
+ * that point, the verdict map has already been updated to reflect what has
+ * happened.
+ *
+ * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
+ * takes place. Which means we may race the execution of the verdict logic and
+ * read map_verd before it has been updated, i.e. we might observe
+ * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
+ *
+ * This confuses the selftest logic: if there was no packet dropped, where's the
+ * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
+ * (which implies the verdict program has not been ran) just re-read the verdict
+ * map again.
+ */
+#define UNSUPPORTED_RACY_VERD _BITUL(1)
+
+enum prog_type {
+ SK_MSG_EGRESS,
+ SK_MSG_INGRESS,
+ SK_SKB_EGRESS,
+ SK_SKB_INGRESS,
+};
+
+enum {
+ SEND_INNER = 0,
+ SEND_OUTER,
+};
+
+enum {
+ RECV_INNER = 0,
+ RECV_OUTER,
+};
+
+struct maps {
+ int in;
+ int out;
+ int verd;
+};
+
+struct combo_spec {
+ enum prog_type prog_type;
+ const char *in, *out;
+};
+
+struct redir_spec {
+ const char *name;
+ int idx_send;
+ int idx_recv;
+ enum prog_type prog_type;
+};
+
+struct socket_spec {
+ int family;
+ int sotype;
+ int send_flags;
+ int in[2];
+ int out[2];
+};
+
+static int socket_spec_pairs(struct socket_spec *s)
+{
+ return create_socket_pairs(s->family, s->sotype,
+ &s->in[0], &s->out[0],
+ &s->in[1], &s->out[1]);
+}
+
+static void socket_spec_close(struct socket_spec *s)
+{
+ xclose(s->in[0]);
+ xclose(s->in[1]);
+ xclose(s->out[0]);
+ xclose(s->out[1]);
+}
+
+static void get_redir_params(struct redir_spec *redir,
+ struct test_sockmap_redir *skel, int *prog_fd,
+ enum bpf_attach_type *attach_type,
+ int *redirect_flags)
+{
+ enum prog_type type = redir->prog_type;
+ struct bpf_program *prog;
+ bool sk_msg;
+
+ sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
+ prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
+
+ *prog_fd = bpf_program__fd(prog);
+ *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
+
+ if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
+ *redirect_flags = BPF_F_INGRESS;
+ else
+ *redirect_flags = 0;
+}
+
+static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
+{
+ ssize_t n;
+ char buf;
+
+ errno = 0;
+ n = recv(fd, &buf, 1, flags);
+ if (n < 0 && expect_success)
+ FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
+ if (!n && !expect_success)
+ FAIL("%s: expected failure: retval=%zd", prefix, n);
+}
+
+static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
+ int sd_recv, int map_verd, int status)
+{
+ unsigned int drop, pass;
+ char recv_buf;
+ ssize_t n;
+
+get_verdict:
+ if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
+ return;
+
+ if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
+ sched_yield();
+ goto get_verdict;
+ }
+
+ if (pass != 0) {
+ FAIL("unsupported: wanted verdict pass 0, have %u", pass);
+ return;
+ }
+
+ /* If nothing was dropped, packet should have reached the peer */
+ if (drop == 0) {
+ errno = 0;
+ n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1)
+ FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
+ }
+
+ /* Ensure queues are empty */
+ try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
+ if (sd_in != sd_send)
+ try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
+
+ try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
+ if (sd_recv != sd_out)
+ try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
+}
+
+static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
+ int sd_in, int sd_out, int sd_recv,
+ struct maps *maps, int status)
+{
+ unsigned int drop, pass;
+ char *send_buf = "ab";
+ char recv_buf = '\0';
+ ssize_t n, len = 1;
+
+ /* Zero out the verdict map */
+ if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
+ xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
+ return;
+
+ if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
+ return;
+
+ if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
+ goto del_in;
+
+ /* Last byte is OOB data when send_flags has MSG_OOB bit set */
+ if (send_flags & MSG_OOB)
+ len++;
+ n = send(sd_send, send_buf, len, send_flags);
+ if (n >= 0 && n < len)
+ FAIL("incomplete send");
+ if (n < 0) {
+ /* sk_msg redirect combo not supported? */
+ if (status & SUPPORTED || errno != EACCES)
+ FAIL_ERRNO("send");
+ goto out;
+ }
+
+ if (!(status & SUPPORTED)) {
+ handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
+ maps->verd, status);
+ goto out;
+ }
+
+ errno = 0;
+ n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1) {
+ FAIL_ERRNO("recv_timeout()");
+ goto out;
+ }
+
+ /* Check verdict _after_ recv(); af_vsock may need time to catch up */
+ if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
+ goto out;
+
+ if (drop != 0 || pass != 1)
+ FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
+ drop, pass);
+
+ if (recv_buf != send_buf[0])
+ FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
+
+ if (send_flags & MSG_OOB) {
+ /* Fail reading OOB while in sockmap */
+ try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ /* Remove sd_out from sockmap */
+ xbpf_map_delete_elem(maps->out, &u32(0));
+
+ /* Check that OOB was dropped on redirect */
+ try_recv("recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ goto del_in;
+ }
+out:
+ xbpf_map_delete_elem(maps->out, &u32(0));
+del_in:
+ xbpf_map_delete_elem(maps->in, &u32(0));
+}
+
+static int is_redir_supported(enum prog_type type, const char *in,
+ const char *out)
+{
+ /* Matching based on strings returned by socket_kind_to_str():
+ * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
+ * Plus a wildcard: any
+ * Not in use: u_seq, v_dgr
+ */
+ struct combo_spec *c, combos[] = {
+ /* Send to local: TCP -> any, but vsock */
+ { SK_MSG_INGRESS, "tcp", "tcp" },
+ { SK_MSG_INGRESS, "tcp", "udp" },
+ { SK_MSG_INGRESS, "tcp", "u_str" },
+ { SK_MSG_INGRESS, "tcp", "u_dgr" },
+
+ /* Send to egress: TCP -> TCP */
+ { SK_MSG_EGRESS, "tcp", "tcp" },
+
+ /* Ingress to egress: any -> any */
+ { SK_SKB_EGRESS, "any", "any" },
+
+ /* Ingress to local: any -> any, but vsock */
+ { SK_SKB_INGRESS, "any", "tcp" },
+ { SK_SKB_INGRESS, "any", "udp" },
+ { SK_SKB_INGRESS, "any", "u_str" },
+ { SK_SKB_INGRESS, "any", "u_dgr" },
+ };
+
+ for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
+ if (c->prog_type == type &&
+ (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
+ (!strcmp(c->out, "any") || strstarts(out, c->out)))
+ return SUPPORTED;
+ }
+
+ return 0;
+}
+
+static int get_support_status(enum prog_type type, const char *in,
+ const char *out)
+{
+ int status = is_redir_supported(type, in, out);
+
+ if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
+ status |= UNSUPPORTED_RACY_VERD;
+
+ return status;
+}
+
+static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps, struct socket_spec *s_in,
+ struct socket_spec *s_out)
+{
+ int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
+ const char *in_str, *out_str;
+ char s[MAX_TEST_NAME];
+
+ fd_in = s_in->in[0];
+ fd_out = s_out->out[0];
+ fd_send = s_in->in[redir->idx_send];
+ fd_peer = s_in->in[redir->idx_send ^ 1];
+ fd_recv = s_out->out[redir->idx_recv];
+ flags = s_in->send_flags;
+
+ in_str = socket_kind_to_str(fd_in);
+ out_str = socket_kind_to_str(fd_out);
+ status = get_support_status(redir->prog_type, in_str, out_str);
+
+ snprintf(s, sizeof(s),
+ "%-4s %-17s %-5s %s %-5s%6s",
+ /* hash sk_skb-to-ingress u_str → v_str (OOB) */
+ type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
+ redir->name,
+ in_str,
+ status & SUPPORTED ? "→" : " ",
+ out_str,
+ (flags & MSG_OOB) ? "(OOB)" : "");
+
+ if (!test__start_subtest(s))
+ return;
+
+ test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
+ maps, status);
+}
+
+static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps)
+{
+ struct socket_spec *s, sockets[] = {
+ { AF_INET, SOCK_STREAM },
+ // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
+ { AF_INET6, SOCK_STREAM },
+ { AF_INET, SOCK_DGRAM },
+ { AF_INET6, SOCK_DGRAM },
+ { AF_UNIX, SOCK_STREAM },
+ { AF_UNIX, SOCK_STREAM, MSG_OOB },
+ { AF_UNIX, SOCK_DGRAM },
+ // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */
+ { AF_VSOCK, SOCK_STREAM },
+ // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */
+ { AF_VSOCK, SOCK_SEQPACKET },
+ };
+
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ if (socket_spec_pairs(s))
+ goto out;
+
+ /* Intra-proto */
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ test_socket(type, redir, maps, s, s);
+
+ /* Cross-proto */
+ for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
+ for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
+ struct socket_spec *out = &sockets[j];
+ struct socket_spec *in = &sockets[i];
+
+ /* Skip intra-proto and between variants */
+ if (out->send_flags ||
+ (in->family == out->family &&
+ in->sotype == out->sotype))
+ continue;
+
+ test_socket(type, redir, maps, in, out);
+ }
+ }
+out:
+ while (--s >= sockets)
+ socket_spec_close(s);
+}
+
+static void test_map(enum bpf_map_type type)
+{
+ struct redir_spec *r, redirs[] = {
+ { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
+ { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
+ { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
+ { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
+ };
+
+ for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
+ enum bpf_attach_type attach_type;
+ struct test_sockmap_redir *skel;
+ struct maps maps;
+ int prog_fd;
+
+ skel = test_sockmap_redir__open_and_load();
+ if (!skel) {
+ FAIL("open_and_load");
+ return;
+ }
+
+ switch (type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ maps.in = bpf_map__fd(skel->maps.nop_map);
+ maps.out = bpf_map__fd(skel->maps.sock_map);
+ break;
+ case BPF_MAP_TYPE_SOCKHASH:
+ maps.in = bpf_map__fd(skel->maps.nop_hash);
+ maps.out = bpf_map__fd(skel->maps.sock_hash);
+ break;
+ default:
+ FAIL("Unsupported bpf_map_type");
+ return;
+ }
+
+ skel->bss->redirect_type = type;
+ maps.verd = bpf_map__fd(skel->maps.verdict_map);
+ get_redir_params(r, skel, &prog_fd, &attach_type,
+ &skel->bss->redirect_flags);
+
+ if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
+ return;
+
+ test_redir(type, r, &maps);
+
+ if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
+ return;
+
+ test_sockmap_redir__destroy(skel);
+ }
+}
+
+void serial_test_sockmap_redir(void)
+{
+ test_map(BPF_MAP_TYPE_SOCKMAP);
+ test_map(BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c85798966aec..76d72a59365e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -56,6 +56,8 @@
#define MAC_DST_FWD "00:11:22:33:44:55"
#define MAC_DST "00:22:33:44:55:66"
+#define MAC_SRC_FWD "00:33:44:55:66:77"
+#define MAC_SRC "00:44:55:66:77:88"
#define IFADDR_STR_LEN 18
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
@@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
int err;
if (result->dev_mode == MODE_VETH) {
- SYS(fail, "ip link add src type veth peer name src_fwd");
- SYS(fail, "ip link add dst type veth peer name dst_fwd");
-
- SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
- SYS(fail, "ip link set dst address " MAC_DST);
+ SYS(fail, "ip link add src address " MAC_SRC " type veth "
+ "peer name src_fwd address " MAC_SRC_FWD);
+ SYS(fail, "ip link add dst address " MAC_DST " type veth "
+ "peer name dst_fwd address " MAC_DST_FWD);
} else if (result->dev_mode == MODE_NETKIT) {
err = create_netkit(NETKIT_L3, "src", "src_fwd");
if (!ASSERT_OK(err, "create_ifindex_src"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
new file mode 100644
index 000000000000..7d1b478c99a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms Inc. */
+#include <test_progs.h>
+#include "test_btf_ext.skel.h"
+#include "btf_helpers.h"
+
+static void subtest_line_func_info(void)
+{
+ struct test_btf_ext *skel;
+ struct bpf_prog_info info;
+ struct bpf_line_info line_info[128], *libbpf_line_info;
+ struct bpf_func_info func_info[128], *libbpf_func_info;
+ __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt;
+ int err, fd;
+
+ skel = test_btf_ext__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.line_info = ptr_to_u64(&line_info);
+ info.nr_line_info = sizeof(line_info);
+ info.line_info_rec_size = sizeof(*line_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_line_info"))
+ goto out;
+
+ libbpf_line_info = bpf_program__line_info(skel->progs.global_func);
+ libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.func_info = ptr_to_u64(&func_info);
+ info.nr_func_info = sizeof(func_info);
+ info.func_info_rec_size = sizeof(*func_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_func_info"))
+ goto out;
+
+ libbpf_func_info = bpf_program__func_info(skel->progs.global_func);
+ libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func);
+
+ if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt"))
+ goto out;
+ if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt"))
+ goto out;
+ ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info),
+ "line_info");
+ ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info),
+ "func_info");
+out:
+ test_btf_ext__destroy(skel);
+}
+
+void test_btf_ext(void)
+{
+ if (test__start_subtest("line_func_info"))
+ subtest_line_func_info();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
index a95b42bf744a..47b56c258f3f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -63,6 +63,9 @@ static void test_set_global_vars_succeeds(void)
" -G \"var_eb = EB2\" "\
" -G \"var_ec = EC2\" "\
" -G \"var_b = 1\" "\
+ " -G \"struct1.struct2.u.var_u8 = 170\" "\
+ " -G \"union1.struct3.var_u8_l = 0xaa\" "\
+ " -G \"union1.struct3.var_u8_h = 0xaa\" "\
"-vl2 > %s", fix->veristat, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
@@ -78,6 +81,8 @@ static void test_set_global_vars_succeeds(void)
__CHECK_STR("_w=12 ", "var_eb = EB2");
__CHECK_STR("_w=13 ", "var_ec = EC2");
__CHECK_STR("_w=1 ", "var_b = 1");
+ __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170");
+ __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa");
out:
teardown_fixture(fix);
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e66a57970d28..c9da06741104 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -14,6 +14,7 @@
#include "verifier_bounds_deduction_non_const.skel.h"
#include "verifier_bounds_mix_sign_unsign.skel.h"
#include "verifier_bpf_get_stack.skel.h"
+#include "verifier_bpf_trap.skel.h"
#include "verifier_bswap.skel.h"
#include "verifier_btf_ctx_access.skel.h"
#include "verifier_btf_unreliable_prog.skel.h"
@@ -148,6 +149,7 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction);
void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); }
+void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); }
void test_verifier_bswap(void) { RUN(verifier_bswap); }
void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); }
void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 3d47878ef6bf..19f92affc2da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -351,9 +351,10 @@ void test_xdp_metadata(void)
struct xdp_metadata2 *bpf_obj2 = NULL;
struct xdp_metadata *bpf_obj = NULL;
struct bpf_program *new_prog, *prog;
+ struct bpf_devmap_val devmap_e = {};
+ struct bpf_map *prog_arr, *devmap;
struct nstoken *tok = NULL;
__u32 queue_id = QUEUE_ID;
- struct bpf_map *prog_arr;
struct xsk tx_xsk = {};
struct xsk rx_xsk = {};
__u32 val, key = 0;
@@ -409,6 +410,13 @@ void test_xdp_metadata(void)
bpf_program__set_ifindex(prog, rx_ifindex);
bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+ /* Make sure we can load a dev-bound program that performs
+ * XDP_REDIRECT into a devmap.
+ */
+ new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect");
+ bpf_program__set_ifindex(new_prog, rx_ifindex);
+ bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
goto out;
@@ -423,6 +431,18 @@ void test_xdp_metadata(void)
"update prog_arr"))
goto out;
+ /* Make sure we can't add dev-bound programs to devmaps. */
+ devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map");
+ if (!ASSERT_OK_PTR(devmap, "no dev_map found"))
+ goto out;
+
+ devmap_e.bpf_prog.fd = val;
+ if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key),
+ &devmap_e, sizeof(devmap_e),
+ BPF_ANY),
+ "update dev_map"))
+ goto out;
+
/* Attach BPF program to RX interface. */
ret = bpf_xdp_attach(rx_ifindex,
diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
new file mode 100644
index 000000000000..079bf3794b3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+long process_byte = 0;
+int verdict_dir = 0;
+int dropped = 0;
+int pkt_size = 0;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return pkt_size;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ int one = 1;
+ int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir);
+
+ if (ret == SK_DROP)
+ dropped++;
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return ret;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_pass(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ int one = 1;
+
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir);
+}
+
+SEC("sk_msg")
+int prog_skmsg_pass(struct sk_msg_md *msg)
+{
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
index fb8dc0768999..d67466c1ff77 100644
--- a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -32,6 +32,7 @@ extern unsigned long CONFIG_NR_CPUS __kconfig;
struct __qspinlock {
union {
atomic_t val;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
struct {
u8 locked;
u8 pending;
@@ -40,6 +41,17 @@ struct __qspinlock {
u16 locked_pending;
u16 tail;
};
+#else
+ struct {
+ u16 tail;
+ u16 locked_pending;
+ };
+ struct {
+ u8 reserved[2];
+ u8 pending;
+ u8 locked;
+ };
+#endif
};
};
@@ -95,9 +107,6 @@ struct arena_qnode {
#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
-
struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
static inline u32 encode_tail(int cpu, int idx)
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 863df7c0fdd0..6e208e24ba3b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -225,8 +225,9 @@
#define CAN_USE_BPF_ST
#endif
-#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
#define CAN_USE_LOAD_ACQ_STORE_REL
#endif
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
new file mode 100644
index 000000000000..13cdb11fdeb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+/* From uapi/linux/dma-buf.h */
+#define DMA_BUF_NAME_LEN 32
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, DMA_BUF_NAME_LEN);
+ __type(value, bool);
+ __uint(max_entries, 5);
+} testbuf_hash SEC(".maps");
+
+/*
+ * Fields output by this iterator are delimited by newlines. Convert any
+ * newlines in user-provided printed strings to spaces.
+ */
+static void sanitize_string(char *src, size_t size)
+{
+ for (char *c = src; (size_t)(c - src) < size && *c; ++c)
+ if (*c == '\n')
+ *c = ' ';
+}
+
+SEC("iter/dmabuf")
+int dmabuf_collector(struct bpf_iter__dmabuf *ctx)
+{
+ const struct dma_buf *dmabuf = ctx->dmabuf;
+ struct seq_file *seq = ctx->meta->seq;
+ unsigned long inode = 0;
+ size_t size;
+ const char *pname, *exporter;
+ char name[DMA_BUF_NAME_LEN] = {'\0'};
+
+ if (!dmabuf)
+ return 0;
+
+ if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) ||
+ bpf_core_read(&size, sizeof(size), &dmabuf->size) ||
+ bpf_core_read(&pname, sizeof(pname), &dmabuf->name) ||
+ bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (pname) {
+ if (bpf_probe_read_kernel(name, sizeof(name), pname))
+ return 1;
+
+ /* Name strings can be provided by userspace */
+ sanitize_string(name, sizeof(name));
+ }
+
+ BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter);
+ return 0;
+}
+
+SEC("syscall")
+int iter_dmabuf_for_each(const void *ctx)
+{
+ struct dma_buf *d;
+
+ bpf_for_each(dmabuf, d) {
+ char name[DMA_BUF_NAME_LEN];
+ const char *pname;
+ bool *found;
+ long len;
+ int i;
+
+ if (bpf_core_read(&pname, sizeof(pname), &d->name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (!pname)
+ continue;
+
+ len = bpf_probe_read_kernel_str(name, sizeof(name), pname);
+ if (len < 0)
+ return 1;
+
+ /*
+ * The entire name buffer is used as a map key.
+ * Zeroize any uninitialized trailing bytes after the NUL.
+ */
+ bpf_for(i, len, DMA_BUF_NAME_LEN)
+ name[i] = 0;
+
+ found = bpf_map_lookup_elem(&testbuf_hash, name);
+ if (found) {
+ bool t = true;
+
+ bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index e1fba28e4a86..a0391f9da2d4 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -680,3 +680,233 @@ out:
bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
return XDP_DROP;
}
+
+void *user_ptr;
+/* Contains the copy of the data pointed by user_ptr.
+ * Size 384 to make it not fit into a single kernel chunk when copying
+ * but less than the maximum bpf stack size (512).
+ */
+char expected_str[384];
+__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
+
+typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr);
+
+/* Returns the offset just before the end of the maximum sized xdp fragment.
+ * Any write larger than 32 bytes will be split between 2 fragments.
+ */
+__u32 xdp_near_frag_end_offset(void)
+{
+ const __u32 headroom = 256;
+ const __u32 max_frag_size = __PAGE_SIZE - headroom - sizeof(struct skb_shared_info);
+
+ /* 32 bytes before the approximate end of the fragment */
+ return max_frag_size - 32;
+}
+
+/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks
+ * of type bpf_read_dynptr_fn_t to prevent compiler from generating
+ * indirect calls that make program fail to load with "unknown opcode" error.
+ */
+static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ int i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr);
+ if (len > sizeof(buf))
+ break;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0);
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_str(void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ __u32 cnt, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 cnt, off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ if (err)
+ return;
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+SEC("xdp")
+int test_probe_read_user_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_user_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr);
+ return 0;
+}
+
+static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
new file mode 100644
index 000000000000..a4a9e1db626f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct inner_map_type);
+} outer_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 427b72954b87..76adf4a8f2da 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -7,8 +7,6 @@
#include "bpf_misc.h"
#include "bpf_compiler.h"
-#define unlikely(x) __builtin_expect(!!(x), 0)
-
static volatile int zero = 0;
int my_pid;
diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c
new file mode 100644
index 000000000000..264e81bfb287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_list_node l;
+ int key;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(node_data, l);
+
+#define list_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+SEC("syscall")
+__retval(0)
+long list_peek(void *ctx)
+{
+ struct bpf_list_node *l_n;
+ struct node_data *n;
+ int i, err = 0;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_front(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_back(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+ n->key = i;
+ bpf_spin_lock(&glock);
+ bpf_list_push_back(&ghead, &n->l);
+ bpf_spin_unlock(&glock);
+ }
+
+ bpf_spin_lock(&glock);
+
+ l_n = bpf_list_front(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != 0) {
+ err = __LINE__;
+ goto done;
+ }
+
+ l_n = bpf_list_back(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != NR_NODES - 1) {
+ err = __LINE__;
+ goto done;
+ }
+
+done:
+ bpf_spin_unlock(&glock);
+ return err;
+}
+
+#define TEST_FB(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_list_node *l_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock); \
+ l_n = bpf_list_##op(&ghead); \
+ if (l_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock); \
+ \
+ return !!jiffies; \
+}
+
+#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_FB(front, true)
+TEST_FB(back, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
+TEST_FB(front, false)
+TEST_FB(back, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
index 1f1dd547e4ee..cfc1f48e0d28 100644
--- a/tools/testing/selftests/bpf/progs/prepare.c
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2025 Meta */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-//#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index dbd5eee8e25e..4acb6af2dfe3 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__retval(0)
long rbtree_api_remove_unadded_node(void *ctx)
{
struct node_data *n, *m;
- struct bpf_rb_node *res;
+ struct bpf_rb_node *res_n, *res_m;
n = bpf_obj_new(typeof(*n));
if (!n)
@@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx)
bpf_spin_lock(&glock);
bpf_rbtree_add(&groot, &n->node, less);
- /* This remove should pass verifier */
- res = bpf_rbtree_remove(&groot, &n->node);
- n = container_of(res, struct node_data, node);
+ res_n = bpf_rbtree_remove(&groot, &n->node);
- /* This remove shouldn't, m isn't in an rbtree */
- res = bpf_rbtree_remove(&groot, &m->node);
- m = container_of(res, struct node_data, node);
+ res_m = bpf_rbtree_remove(&groot, &m->node);
bpf_spin_unlock(&glock);
- if (n)
- bpf_obj_drop(n);
- if (m)
- bpf_obj_drop(m);
+ bpf_obj_drop(m);
+ if (res_n)
+ bpf_obj_drop(container_of(res_n, struct node_data, node));
+ if (res_m) {
+ bpf_obj_drop(container_of(res_m, struct node_data, node));
+ /* m was not added to the rbtree */
+ return 2;
+ }
+
return 0;
}
@@ -178,7 +179,7 @@ err_out:
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_add_release_unlock_escape(void *ctx)
{
struct node_data *n;
@@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_first_release_unlock_escape(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
new file mode 100644
index 000000000000..098ef970fac1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_refcount ref;
+ struct bpf_rb_node r0;
+ struct bpf_rb_node r1;
+ int key0;
+ int key1;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock0;
+private(A) struct bpf_rb_root groot0 __contains(node_data, r0);
+
+private(B) struct bpf_spin_lock glock1;
+private(B) struct bpf_rb_root groot1 __contains(node_data, r1);
+
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r0);
+ node_b = rb_entry(b, struct node_data, r0);
+
+ return node_a->key0 < node_b->key0;
+}
+
+static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r1);
+ node_b = rb_entry(b, struct node_data, r1);
+
+ return node_a->key1 < node_b->key1;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search(void *ctx)
+{
+ struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES];
+ long lookup_key = NR_NODES / 2;
+ struct node_data *n, *m;
+ int i, nr_gc = 0;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+
+ m = bpf_refcount_acquire(n);
+
+ n->key0 = i;
+ m->key1 = i;
+
+ bpf_spin_lock(&glock0);
+ bpf_rbtree_add(&groot0, &n->r0, less0);
+ bpf_spin_unlock(&glock0);
+
+ bpf_spin_lock(&glock1);
+ bpf_rbtree_add(&groot1, &m->r1, less1);
+ bpf_spin_unlock(&glock1);
+ }
+
+ n = NULL;
+ bpf_spin_lock(&glock0);
+ rb_n = bpf_rbtree_root(&groot0);
+ while (can_loop) {
+ if (!rb_n) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ n = rb_entry(rb_n, struct node_data, r0);
+ if (lookup_key == n->key0)
+ break;
+ if (nr_gc < NR_NODES)
+ gc_ns[nr_gc++] = rb_n;
+ if (lookup_key < n->key0)
+ rb_n = bpf_rbtree_left(&groot0, rb_n);
+ else
+ rb_n = bpf_rbtree_right(&groot0, rb_n);
+ }
+
+ if (!n || lookup_key != n->key0) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n);
+ }
+
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&glock0);
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ if (rb_n) {
+ n = rb_entry(rb_n, struct node_data, r0);
+ bpf_obj_drop(n);
+ }
+ }
+
+ if (!m)
+ return __LINE__;
+
+ bpf_spin_lock(&glock1);
+ rb_m = bpf_rbtree_remove(&groot1, &m->r1);
+ bpf_spin_unlock(&glock1);
+ bpf_obj_drop(m);
+ if (!rb_m)
+ return __LINE__;
+ bpf_obj_drop(rb_entry(rb_m, struct node_data, r1));
+
+ return 0;
+}
+
+#define TEST_ROOT(dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_root_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+#define TEST_LR(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ struct node_data *n; \
+ __u64 jiffies = 0; \
+ \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (!rb_n) { \
+ bpf_spin_unlock(&glock0); \
+ return 1; \
+ } \
+ n = rb_entry(rb_n, struct node_data, r0); \
+ n = bpf_refcount_acquire(n); \
+ bpf_spin_unlock(&glock0); \
+ if (!n) \
+ return 1; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_##op(&groot0, &n->r0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+/*
+ * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * to ensure the message itself is not in the bpf prog lineinfo
+ * which the verifier includes in its log.
+ * Otherwise, the test_loader will incorrectly match the prog lineinfo
+ * instead of the log generated by the verifier.
+ */
+#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_ROOT(true)
+#undef MSG
+#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_LR(left, true)
+TEST_LR(right, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root"
+TEST_ROOT(false)
+TEST_LR(left, false)
+TEST_LR(right, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
index 9adb5ba4cd4d..90f5656c3991 100644
--- a/tools/testing/selftests/bpf/progs/set_global_vars.c
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -24,6 +24,44 @@ const volatile enum Enumu64 var_eb = EB1;
const volatile enum Enums64 var_ec = EC1;
const volatile bool var_b = false;
+struct Struct {
+ int:16;
+ __u16 filler;
+ struct {
+ const __u16 filler2;
+ };
+ struct Struct2 {
+ __u16 filler;
+ volatile struct {
+ const int:1;
+ union {
+ const volatile __u8 var_u8;
+ const volatile __s16 filler3;
+ const int:1;
+ } u;
+ };
+ } struct2;
+};
+
+const volatile __u32 stru = 0; /* same prefix as below */
+const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}};
+
+union Union {
+ __u16 var_u16;
+ struct Struct3 {
+ struct {
+ __u8 var_u8_l;
+ };
+ struct {
+ struct {
+ __u8 var_u8_h;
+ };
+ };
+ } struct3;
+};
+
+const volatile union Union union1 = {.var_u16 = -1};
+
char arr[4] = {0};
SEC("socket")
@@ -43,5 +81,8 @@ int test_set_globals(void *ctx)
a = var_eb;
a = var_ec;
a = var_b;
+ a = struct1.struct2.u.var_u8;
+ a = union1.var_u16;
+
return a;
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c
new file mode 100644
index 000000000000..cdf20331db04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms Inc. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__noinline static void f0(void)
+{
+ __u64 a = 1;
+
+ __sink(a);
+}
+
+SEC("xdp")
+__u64 global_func(struct xdp_md *xdp)
+{
+ f0();
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
new file mode 100644
index 000000000000..8bdb9987c0c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int cork_byte;
+int push_start;
+int push_end;
+int apply_bytes;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int prog_sk_policy(struct sk_msg_md *msg)
+{
+ if (cork_byte > 0)
+ bpf_msg_cork_bytes(msg, cork_byte);
+ if (push_start > 0 && push_end > 0)
+ bpf_msg_push_data(msg, push_start, push_end, 0);
+
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_sk_policy_redir(struct sk_msg_md *msg)
+{
+ int two = 2;
+
+ bpf_msg_apply_bytes(msg, apply_bytes);
+ return bpf_msg_redirect_map(msg, &sock_map, two, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
new file mode 100644
index 000000000000..34d9f4f2f0a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_map, sock_map;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_hash, sock_hash;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, unsigned int);
+} verdict_map;
+
+/* Set by user space */
+int redirect_type;
+int redirect_flags;
+
+#define redirect_map(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_map, \
+ struct sk_msg_md * : bpf_msg_redirect_map \
+ )((__data), &sock_map, (__u32){0}, redirect_flags)
+
+#define redirect_hash(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_hash, \
+ struct sk_msg_md * : bpf_msg_redirect_hash \
+ )((__data), &sock_hash, &(__u32){0}, redirect_flags)
+
+#define DEFINE_PROG(__type, __param) \
+SEC("sk_" XSTR(__type)) \
+int prog_ ## __type ## _verdict(__param data) \
+{ \
+ unsigned int *count; \
+ int verdict; \
+ \
+ if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \
+ verdict = redirect_map(data); \
+ else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \
+ verdict = redirect_hash(data); \
+ else \
+ verdict = redirect_type - __MAX_BPF_MAP_TYPE; \
+ \
+ count = bpf_map_lookup_elem(&verdict_map, &verdict); \
+ if (count) \
+ (*count)++; \
+ \
+ return verdict; \
+}
+
+DEFINE_PROG(skb, struct __sk_buff *);
+DEFINE_PROG(msg, struct sk_msg_md *);
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index eb5cca1fce16..7d5293de1952 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -294,7 +294,9 @@ static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
(ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
goto err;
- if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ if (!ctx->attrs.wscale_ok ||
+ !ctx->attrs.snd_wscale ||
+ ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK)
goto err;
if (!ctx->attrs.tstamp_ok)
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
new file mode 100644
index 000000000000..35e2cdc00a01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 21 && 0
+SEC("socket")
+__description("__builtin_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_builtin_trap_with_simple_c(void)
+{
+ __builtin_trap();
+}
+#endif
+
+SEC("socket")
+__description("__bpf_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_trap_with_simple_c(void)
+{
+ __bpf_trap();
+}
+
+SEC("socket")
+__description("__bpf_trap as the second-from-last insn")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void bpf_trap_at_func_end(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "call %[__bpf_trap];"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead code __bpf_trap in the middle of code")
+__success
+__naked void dead_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 0 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("reachable __bpf_trap in the middle of code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void live_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 1 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
index 28b939572cda..03942cec07e5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -65,4 +65,16 @@ __naked void ctx_access_u32_pointer_reject_8(void)
" ::: __clobber_all);
}
+SEC("fentry/bpf_fentry_test10")
+__description("btf_ctx_access const void pointer accept")
+__success __retval(0)
+__naked void ctx_access_const_void_pointer_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); /* load 1st argument value (const void pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
index 77698d5a19e4..74f4f19c10b8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -10,65 +10,81 @@
SEC("socket")
__description("load-acquire, 8-bit")
-__success __success_unpriv __retval(0x12)
+__success __success_unpriv __retval(0)
__naked void load_acquire_8(void)
{
asm volatile (
- "w1 = 0x12;"
+ "r0 = 0;"
+ "w1 = 0xfe;"
"*(u8 *)(r10 - 1) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1))
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 16-bit")
-__success __success_unpriv __retval(0x1234)
+__success __success_unpriv __retval(0)
__naked void load_acquire_16(void)
{
asm volatile (
- "w1 = 0x1234;"
+ "r0 = 0;"
+ "w1 = 0xfedc;"
"*(u16 *)(r10 - 2) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2))
+ BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 32-bit")
-__success __success_unpriv __retval(0x12345678)
+__success __success_unpriv __retval(0)
__naked void load_acquire_32(void)
{
asm volatile (
- "w1 = 0x12345678;"
+ "r0 = 0;"
+ "w1 = 0xfedcba09;"
"*(u32 *)(r10 - 4) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4))
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 64-bit")
-__success __success_unpriv __retval(0x1234567890abcdef)
+__success __success_unpriv __retval(0)
__naked void load_acquire_64(void)
{
asm volatile (
- "r1 = 0x1234567890abcdef ll;"
+ "r0 = 0;"
+ "r1 = 0xfedcba0987654321 ll;"
"*(u64 *)(r10 - 8) = r1;"
- ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8));
+ ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8))
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
: __clobber_all);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 6662d4b39969..9fe5d255ee37 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -91,8 +91,7 @@ __naked int bpf_end_bswap(void)
::: __clobber_all);
}
-#if defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
SEC("?raw_tp")
__success __log_level(2)
@@ -138,7 +137,7 @@ __naked int bpf_store_release(void)
: __clobber_all);
}
-#endif /* load-acquire, store-release */
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
#endif /* v4 instruction */
SEC("?raw_tp")
@@ -179,4 +178,57 @@ __naked int state_loop_first_last_equal(void)
);
}
+__used __naked static void __bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r2 = 2314885393468386424 ll;"
+ "goto +0;"
+ "if r2 <= r10 goto +3;"
+ "if r1 >= -1835016 goto +0;"
+ "if r2 <= 8 goto +0;"
+ "if r3 <= 0 goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (bd) if r2 <= r10 goto pc+3")
+__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("10: (b5) if r2 <= 0x8 goto pc+0")
+__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3")
+__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0")
+__naked void bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r3 = 0 ll;"
+ "call __bpf_cond_op_r10;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("3: (bf) r3 = r10")
+__msg("4: (bd) if r3 <= r2 goto pc+1")
+__msg("5: (b5) if r2 <= 0x8 goto pc+2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__naked void bpf_cond_op_not_r10(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 2314885393468386424 ll;"
+ "r3 = r10;"
+ "if r3 <= r2 goto +1;"
+ "if r2 <= 8 goto +2;"
+ "r0 = 2 ll;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
index c0442d5bb049..72f1eb006074 100644
--- a/tools/testing/selftests/bpf/progs/verifier_store_release.c
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -6,18 +6,21 @@
#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
-#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
SEC("socket")
__description("store-release, 8-bit")
-__success __success_unpriv __retval(0x12)
+__success __success_unpriv __retval(0)
__naked void store_release_8(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x12;"
".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
- "w0 = *(u8 *)(r10 - 1);"
+ "w2 = *(u8 *)(r10 - 1);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -27,13 +30,17 @@ __naked void store_release_8(void)
SEC("socket")
__description("store-release, 16-bit")
-__success __success_unpriv __retval(0x1234)
+__success __success_unpriv __retval(0)
__naked void store_release_16(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x1234;"
".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
- "w0 = *(u16 *)(r10 - 2);"
+ "w2 = *(u16 *)(r10 - 2);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -43,13 +50,17 @@ __naked void store_release_16(void)
SEC("socket")
__description("store-release, 32-bit")
-__success __success_unpriv __retval(0x12345678)
+__success __success_unpriv __retval(0)
__naked void store_release_32(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x12345678;"
".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
- "w0 = *(u32 *)(r10 - 4);"
+ "w2 = *(u32 *)(r10 - 4);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -59,13 +70,17 @@ __naked void store_release_32(void)
SEC("socket")
__description("store-release, 64-bit")
-__success __success_unpriv __retval(0x1234567890abcdef)
+__success __success_unpriv __retval(0)
__naked void store_release_64(void)
{
asm volatile (
+ "r0 = 0;"
"r1 = 0x1234567890abcdef ll;"
".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
- "r0 = *(u64 *)(r10 - 8);"
+ "r2 = *(u64 *)(r10 - 8);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -271,7 +286,7 @@ __naked void store_release_with_invalid_reg(void)
: __clobber_all);
}
-#else
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
SEC("socket")
__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
@@ -281,6 +296,6 @@ int dummy_test(void)
return 0;
}
-#endif
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 31ca229bb3c0..09bb8a038d52 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -19,6 +19,13 @@ struct {
__type(value, __u32);
} prog_arr SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} dev_map SEC(".maps");
+
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
@@ -95,4 +102,10 @@ int rx(struct xdp_md *ctx)
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
+SEC("xdp")
+int redirect(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 3220f1d28697..2e54b95ad898 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -134,6 +134,10 @@ bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
return bpf_testmod_test_struct_arg_result;
}
+__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void)
+{
+}
+
__bpf_kfunc void
bpf_testmod_test_mod_kfunc(int i)
{
@@ -1340,7 +1344,7 @@ static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
- *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
*insn++ = prog->insnsi[0];
@@ -1379,7 +1383,7 @@ static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struc
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
- *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
*insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
*insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
*insn++ = BPF_EXIT_INSN();
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 49f2fc61061f..9551d8d5f8f9 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -1042,6 +1042,14 @@ void run_subtest(struct test_loader *tester,
emit_verifier_log(tester->log_buf, false /*force*/);
validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log);
+ /* Restore capabilities because the kernel will silently ignore requests
+ * for program info (such as xlated program text) if we are not
+ * bpf-capable. Also, for some reason test_verifier executes programs
+ * with all capabilities restored. Do the same here.
+ */
+ if (restore_capabilities(&caps))
+ goto tobj_cleanup;
+
if (subspec->expect_xlated.cnt) {
err = get_xlated_program_text(bpf_program__fd(tprog),
tester->log_buf, tester->log_buf_sz);
@@ -1067,12 +1075,6 @@ void run_subtest(struct test_loader *tester,
}
if (should_do_test_run(spec, subspec)) {
- /* For some reason test_verifier executes programs
- * with all capabilities restored. Do the same here.
- */
- if (restore_capabilities(&caps))
- goto tobj_cleanup;
-
/* Do bpf_map__attach_struct_ops() for each struct_ops map.
* This should trigger bpf_struct_ops->reg callback on kernel side.
*/
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 447b68509d76..27db34ecf3f5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -734,7 +734,7 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
-static char bpf_vlog[UINT_MAX >> 8];
+static char bpf_vlog[UINT_MAX >> 5];
static int load_btf_spec(__u32 *types, int types_len,
const char *strings, int strings_len)
@@ -1559,10 +1559,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->errstr_unpriv : test->errstr;
opts.expected_attach_type = test->expected_attach_type;
- if (verbose)
- opts.log_level = verif_log_level | 4; /* force stats */
- else if (expected_ret == VERBOSE_ACCEPT)
+ if (expected_ret == VERBOSE_ACCEPT)
opts.log_level = 2;
+ else if (verbose)
+ opts.log_level = verif_log_level | 4; /* force stats */
else
opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
opts.prog_flags = pflags;
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index a18972ffdeb6..b2bb20b00952 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1486,7 +1486,84 @@ static bool is_preset_supported(const struct btf_type *t)
return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
}
-static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t,
+const int btf_find_member(const struct btf *btf,
+ const struct btf_type *parent_type,
+ __u32 parent_offset,
+ const char *member_name,
+ int *member_tid,
+ __u32 *member_offset)
+{
+ int i;
+
+ if (!btf_is_composite(parent_type))
+ return -EINVAL;
+
+ for (i = 0; i < btf_vlen(parent_type); ++i) {
+ const struct btf_member *member;
+ const struct btf_type *member_type;
+ int tid;
+
+ member = btf_members(parent_type) + i;
+ tid = btf__resolve_type(btf, member->type);
+ if (tid < 0)
+ return -EINVAL;
+
+ member_type = btf__type_by_id(btf, tid);
+ if (member->name_off) {
+ const char *name = btf__name_by_offset(btf, member->name_off);
+
+ if (strcmp(member_name, name) == 0) {
+ if (btf_member_bitfield_size(parent_type, i) != 0) {
+ fprintf(stderr, "Bitfield presets are not supported %s\n",
+ name);
+ return -EINVAL;
+ }
+ *member_offset = parent_offset + member->offset;
+ *member_tid = tid;
+ return 0;
+ }
+ } else if (btf_is_composite(member_type)) {
+ int err;
+
+ err = btf_find_member(btf, member_type, parent_offset + member->offset,
+ member_name, member_tid, member_offset);
+ if (!err)
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
+ struct btf_var_secinfo *sinfo, const char *var)
+{
+ char expr[256], *saveptr;
+ const struct btf_type *base_type, *member_type;
+ int err, member_tid;
+ char *name;
+ __u32 member_offset = 0;
+
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ snprintf(expr, sizeof(expr), "%s", var);
+ strtok_r(expr, ".", &saveptr);
+
+ while ((name = strtok_r(NULL, ".", &saveptr))) {
+ err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset);
+ if (err) {
+ fprintf(stderr, "Could not find member %s for variable %s\n", name, var);
+ return err;
+ }
+ member_type = btf__type_by_id(btf, member_tid);
+ sinfo->offset += member_offset / 8;
+ sinfo->size = member_type->size;
+ sinfo->type = member_tid;
+ base_type = member_type;
+ }
+ return 0;
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf,
struct bpf_map *map, struct btf_var_secinfo *sinfo,
struct var_preset *preset)
{
@@ -1495,9 +1572,9 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct
long long value = preset->ivalue;
size_t size;
- base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type));
if (!base_type) {
- fprintf(stderr, "Failed to resolve type %d\n", t->type);
+ fprintf(stderr, "Failed to resolve type %d\n", sinfo->type);
return -EINVAL;
}
if (!is_preset_supported(base_type)) {
@@ -1530,7 +1607,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct
if (value >= max_val || value < -max_val) {
fprintf(stderr,
"Variable %s value %lld is out of range [%lld; %lld]\n",
- btf__name_by_offset(btf, t->name_off), value,
+ btf__name_by_offset(btf, base_type->name_off), value,
is_signed ? -max_val : 0, max_val - 1);
return -EINVAL;
}
@@ -1583,14 +1660,20 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
for (j = 0; j < n; ++j, ++sinfo) {
const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
const char *var_name;
+ int var_len;
if (!btf_is_var(var_type))
continue;
var_name = btf__name_by_offset(btf, var_type->name_off);
+ var_len = strlen(var_name);
for (k = 0; k < npresets; ++k) {
- if (strcmp(var_name, presets[k].name) != 0)
+ struct btf_var_secinfo tmp_sinfo;
+
+ if (strncmp(var_name, presets[k].name, var_len) != 0 ||
+ (presets[k].name[var_len] != '\0' &&
+ presets[k].name[var_len] != '.'))
continue;
if (presets[k].applied) {
@@ -1598,13 +1681,17 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
var_name);
return -EINVAL;
}
+ tmp_sinfo = *sinfo;
+ err = adjust_var_secinfo(btf, var_type,
+ &tmp_sinfo, presets[k].name);
+ if (err)
+ return err;
- err = set_global_var(obj, btf, var_type, map, sinfo, presets + k);
+ err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k);
if (err)
return err;
presets[k].applied = true;
- break;
}
}
}
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index efea93cf23d4..cd12b8b5ac0e 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -378,7 +378,7 @@ display net,port,proto
type_spec ipv4_addr . inet_service . inet_proto
chain_spec ip daddr . udp dport . meta l4proto
dst addr4 port proto
-src
+src
start 1
count 9
src_delta 9
@@ -419,6 +419,7 @@ table inet filter {
set test {
type ${type_spec}
+ counter
flags interval,timeout
}
@@ -1158,9 +1159,18 @@ del() {
fi
}
-# Return packet count from 'test' counter in 'inet filter' table
+# Return packet count for elem $1 from 'test' counter in 'inet filter' table
count_packets() {
found=0
+ for token in $(nft reset element inet filter test "${1}" ); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets_nomatch() {
+ found=0
for token in $(nft list counter inet filter test); do
[ ${found} -eq 1 ] && echo "${token}" && return
[ "${token}" = "packets" ] && found=1
@@ -1206,6 +1216,10 @@ perf() {
# Set MAC addresses, send single packet, check that it matches, reset counter
send_match() {
+ local elem="$1"
+
+ shift
+
ip link set veth_a address "$(format_mac "${1}")"
ip -n B link set veth_b address "$(format_mac "${2}")"
@@ -1216,7 +1230,7 @@ send_match() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "1" ]; then
+ if [ "$(count_packets "$elem")" != "1" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1242,7 +1256,7 @@ send_nomatch() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "0" ]; then
+ if [ "$(count_packets_nomatch)" != "0" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1255,6 +1269,42 @@ send_nomatch() {
fi
}
+maybe_send_nomatch() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets_nomatch)" != "0" ]; then
+ err "Packet to $dst_addr4:$dst_port did match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+maybe_send_match() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets "{ $elem }")" != "1" ]; then
+ err "Packet to $dst_addr4:$dst_port did not match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+ nft reset counter inet filter test >/dev/null
+ nft reset element inet filter test "{ $elem }" >/dev/null
+}
+
# Correctness test template:
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
@@ -1262,6 +1312,8 @@ send_nomatch() {
test_correctness_main() {
range_size=1
for i in $(seq "${start}" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
# Avoid negative or zero-sized port ranges
@@ -1272,15 +1324,16 @@ test_correctness_main() {
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
# Delete elements now and then
if [ $((i % 3)) -eq 0 ]; then
- del "$(format)" || return 1
+ del "$elem" || return 1
for j in $(seq "$start" \
$((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) \
@@ -1572,14 +1625,17 @@ test_timeout() {
range_size=1
for i in $(seq "$start" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1737,7 +1793,7 @@ test_bug_reload() {
srcend=$((end + src_delta))
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$(format)" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1756,22 +1812,34 @@ test_bug_net_port_proto_match() {
range_size=1
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+
+ # too slow, do not test all addresses
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1
nft "add element inet filter test { $elem }" || return 1
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1
+
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
local got=$(nft "get element inet filter test { $elem }")
err "post-add: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1
done
done
# recheck after set was filled
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
@@ -1779,6 +1847,9 @@ test_bug_net_port_proto_match() {
err "post-fill: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1
done
done
@@ -1786,9 +1857,10 @@ test_bug_net_port_proto_match() {
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
local rnd=$((RANDOM%10))
+ local dport=$j
local got=""
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
if [ $rnd -gt 0 ];then
continue
fi
@@ -1799,6 +1871,8 @@ test_bug_net_port_proto_match() {
err "post-delete: query for $elem returned $got instead of error."
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1
done
done
@@ -1817,7 +1891,7 @@ test_bug_avx2_mismatch()
dst_addr6="$a2"
send_icmp6
- if [ "$(count_packets)" -gt "0" ]; then
+ if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then
err "False match for $a2"
return 1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index 9e39de26455f..a954754b99b3 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -866,6 +866,24 @@ EOF
ip netns exec "$ns0" nft delete table $family nat
}
+file_cmp()
+{
+ local infile="$1"
+ local outfile="$2"
+
+ if ! cmp "$infile" "$outfile";then
+ echo -n "Infile "
+ ls -l "$infile"
+ echo -n "Outfile "
+ ls -l "$outfile"
+ echo "ERROR: in and output file mismatch when checking $msg" 1>&1
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
test_stateless_nat_ip()
{
local lret=0
@@ -966,11 +984,7 @@ EOF
wait
- if ! cmp "$INFILE" "$OUTFILE";then
- ls -l "$INFILE" "$OUTFILE"
- echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
- lret=1
- fi
+ file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1
:> "$OUTFILE"
@@ -991,6 +1005,62 @@ EOF
return $lret
}
+test_dnat_clash()
+{
+ local lret=0
+
+ if ! socat -h > /dev/null 2>&1;then
+ echo "SKIP: Could not run dnat clash test without socat tool"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+flush ruleset
+table ip dnat-test {
+ chain prerouting {
+ type nat hook prerouting priority dstnat; policy accept;
+ ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add dnat rules"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ local udpdaddr="10.0.2.1"
+ for i in 1 2;do
+ echo "PING $udpdaddr" > "$INFILE"
+ echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null &
+ local lpid=$!
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u"
+
+ result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE")
+ udpdaddr="10.0.1.1"
+
+ if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2
+ lret=1
+ ret=1
+ fi
+
+ wait
+
+ file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1
+
+ :> "$OUTFILE"
+ done
+
+ test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2"
+
+ ip netns exec "$ns0" nft flush ruleset
+
+ return $lret
+}
+
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in "$ns0" "$ns1" "$ns2" ;do
ip netns exec "$i" nft -f /dev/stdin <<EOF
@@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet
test_port_shadowing
test_stateless_nat_ip
+test_dnat_clash
if [ $ret -ne 0 ];then
echo -n "FAIL: "
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
index de9c26f98b2e..9201f2905f2c 100644
--- a/tools/testing/selftests/net/ovpn/ovpn-cli.c
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -2166,6 +2166,7 @@ static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
ovpn->peers_file = argv[4];
+ ovpn->sa_family = AF_INET;
if (argc > 5 && !strcmp(argv[5], "ipv6"))
ovpn->sa_family = AF_INET6;
break;
diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
new file mode 100755
index 000000000000..ce2a2cb64f72
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+MTU="1500"
+
+source test.sh