diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:34:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:34:53 -0700 |
commit | 81160dda9a7aad13c04e78bb2cfd3c4630e3afab (patch) | |
tree | 4bf79ffa9fc7dc5e2915ff978778c3402c491113 /kernel/bpf/xskmap.c | |
parent | 8b53c76533aa4356602aea98f98a2f3b4051464c (diff) | |
parent | 1bab8d4c488be22d57f9dd09968c90a0ddc413bf (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
1) Support IPV6 RA Captive Portal Identifier, from Maciej Żenczykowski.
2) Use bio_vec in the networking instead of custom skb_frag_t, from
Matthew Wilcox.
3) Make use of xmit_more in r8169 driver, from Heiner Kallweit.
4) Add devmap_hash to xdp, from Toke Høiland-Jørgensen.
5) Support all variants of 5750X bnxt_en chips, from Michael Chan.
6) More RTNL avoidance work in the core and mlx5 driver, from Vlad
Buslov.
7) Add TCP syn cookies bpf helper, from Petar Penkov.
8) Add 'nettest' to selftests and use it, from David Ahern.
9) Add extack support to drop_monitor, add packet alert mode and
support for HW drops, from Ido Schimmel.
10) Add VLAN offload to stmmac, from Jose Abreu.
11) Lots of devm_platform_ioremap_resource() conversions, from
YueHaibing.
12) Add IONIC driver, from Shannon Nelson.
13) Several kTLS cleanups, from Jakub Kicinski.
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1930 commits)
mlxsw: spectrum_buffers: Add the ability to query the CPU port's shared buffer
mlxsw: spectrum: Register CPU port with devlink
mlxsw: spectrum_buffers: Prevent changing CPU port's configuration
net: ena: fix incorrect update of intr_delay_resolution
net: ena: fix retrieval of nonadaptive interrupt moderation intervals
net: ena: fix update of interrupt moderation register
net: ena: remove all old adaptive rx interrupt moderation code from ena_com
net: ena: remove ena_restore_ethtool_params() and relevant fields
net: ena: remove old adaptive interrupt moderation code from ena_netdev
net: ena: remove code duplication in ena_com_update_nonadaptive_moderation_interval _*()
net: ena: enable the interrupt_moderation in driver_supported_features
net: ena: reimplement set/get_coalesce()
net: ena: switch to dim algorithm for rx adaptive interrupt moderation
net: ena: add intr_moder_rx_interval to struct ena_com_dev and use it
net: phy: adin: implement Energy Detect Powerdown mode via phy-tunable
ethtool: implement Energy Detect Powerdown support via phy-tunable
xen-netfront: do not assume sk_buff_head list is empty in error handling
s390/ctcm: Delete unnecessary checks before the macro call “dev_kfree_skb”
net: ena: don't wake up tx queue when down
drop_monitor: Better sanitize notified packets
...
Diffstat (limited to 'kernel/bpf/xskmap.c')
-rw-r--r-- | kernel/bpf/xskmap.c | 133 |
1 files changed, 111 insertions, 22 deletions
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 9bb96ace9fa1..942c662e2eed 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -13,8 +13,71 @@ struct xsk_map { struct bpf_map map; struct xdp_sock **xsk_map; struct list_head __percpu *flush_list; + spinlock_t lock; /* Synchronize map updates */ }; +int xsk_map_inc(struct xsk_map *map) +{ + struct bpf_map *m = &map->map; + + m = bpf_map_inc(m, false); + return PTR_ERR_OR_ZERO(m); +} + +void xsk_map_put(struct xsk_map *map) +{ + bpf_map_put(&map->map); +} + +static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, + struct xdp_sock **map_entry) +{ + struct xsk_map_node *node; + int err; + + node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN); + if (!node) + return NULL; + + err = xsk_map_inc(map); + if (err) { + kfree(node); + return ERR_PTR(err); + } + + node->map = map; + node->map_entry = map_entry; + return node; +} + +static void xsk_map_node_free(struct xsk_map_node *node) +{ + xsk_map_put(node->map); + kfree(node); +} + +static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) +{ + spin_lock_bh(&xs->map_list_lock); + list_add_tail(&node->node, &xs->map_list); + spin_unlock_bh(&xs->map_list_lock); +} + +static void xsk_map_sock_delete(struct xdp_sock *xs, + struct xdp_sock **map_entry) +{ + struct xsk_map_node *n, *tmp; + + spin_lock_bh(&xs->map_list_lock); + list_for_each_entry_safe(n, tmp, &xs->map_list, node) { + if (map_entry == n->map_entry) { + list_del(&n->node); + xsk_map_node_free(n); + } + } + spin_unlock_bh(&xs->map_list_lock); +} + static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) { struct xsk_map *m; @@ -34,6 +97,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&m->map, attr); + spin_lock_init(&m->lock); cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); cost += sizeof(struct list_head) * num_possible_cpus(); @@ -71,21 +135,9 @@ free_m: static void xsk_map_free(struct bpf_map *map) { struct xsk_map *m = container_of(map, struct xsk_map, map); - int i; bpf_clear_redirect_map(map); synchronize_net(); - - for (i = 0; i < map->max_entries; i++) { - struct xdp_sock *xs; - - xs = m->xsk_map[i]; - if (!xs) - continue; - - sock_put((struct sock *)xs); - } - free_percpu(m->flush_list); bpf_map_area_free(m->xsk_map); kfree(m); @@ -164,8 +216,9 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct xsk_map *m = container_of(map, struct xsk_map, map); + struct xdp_sock *xs, *old_xs, **map_entry; u32 i = *(u32 *)key, fd = *(u32 *)value; - struct xdp_sock *xs, *old_xs; + struct xsk_map_node *node; struct socket *sock; int err; @@ -173,8 +226,6 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, return -EINVAL; if (unlikely(i >= m->map.max_entries)) return -E2BIG; - if (unlikely(map_flags == BPF_NOEXIST)) - return -EEXIST; sock = sockfd_lookup(fd, &err); if (!sock) @@ -192,32 +243,70 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, return -EOPNOTSUPP; } - sock_hold(sock->sk); + map_entry = &m->xsk_map[i]; + node = xsk_map_node_alloc(m, map_entry); + if (IS_ERR(node)) { + sockfd_put(sock); + return PTR_ERR(node); + } - old_xs = xchg(&m->xsk_map[i], xs); + spin_lock_bh(&m->lock); + old_xs = READ_ONCE(*map_entry); + if (old_xs == xs) { + err = 0; + goto out; + } else if (old_xs && map_flags == BPF_NOEXIST) { + err = -EEXIST; + goto out; + } else if (!old_xs && map_flags == BPF_EXIST) { + err = -ENOENT; + goto out; + } + xsk_map_sock_add(xs, node); + WRITE_ONCE(*map_entry, xs); if (old_xs) - sock_put((struct sock *)old_xs); - + xsk_map_sock_delete(old_xs, map_entry); + spin_unlock_bh(&m->lock); sockfd_put(sock); return 0; + +out: + spin_unlock_bh(&m->lock); + sockfd_put(sock); + xsk_map_node_free(node); + return err; } static int xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); - struct xdp_sock *old_xs; + struct xdp_sock *old_xs, **map_entry; int k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; - old_xs = xchg(&m->xsk_map[k], NULL); + spin_lock_bh(&m->lock); + map_entry = &m->xsk_map[k]; + old_xs = xchg(map_entry, NULL); if (old_xs) - sock_put((struct sock *)old_xs); + xsk_map_sock_delete(old_xs, map_entry); + spin_unlock_bh(&m->lock); return 0; } +void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, + struct xdp_sock **map_entry) +{ + spin_lock_bh(&map->lock); + if (READ_ONCE(*map_entry) == xs) { + WRITE_ONCE(*map_entry, NULL); + xsk_map_sock_delete(xs, map_entry); + } + spin_unlock_bh(&map->lock); +} + const struct bpf_map_ops xsk_map_ops = { .map_alloc = xsk_map_alloc, .map_free = xsk_map_free, |