diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/8021q/vlan_core.c | 10 | ||||
-rw-r--r-- | net/Kconfig | 8 | ||||
-rw-r--r-- | net/bridge/br_arp_nd_proxy.c | 4 | ||||
-rw-r--r-- | net/bridge/br_vlan_tunnel.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 272 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 2 | ||||
-rw-r--r-- | net/core/net-procfs.c | 3 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 177 | ||||
-rw-r--r-- | net/decnet/dn_route.c | 21 | ||||
-rw-r--r-- | net/dsa/Kconfig | 17 | ||||
-rw-r--r-- | net/dsa/dsa_priv.h | 14 | ||||
-rw-r--r-- | net/dsa/tag_brcm.c | 107 | ||||
-rw-r--r-- | net/ethernet/eth.c | 11 | ||||
-rw-r--r-- | net/ethtool/ioctl.c | 12 | ||||
-rw-r--r-- | net/hsr/hsr_debugfs.c | 2 | ||||
-rw-r--r-- | net/l2tp/l2tp_core.c | 2 | ||||
-rw-r--r-- | net/openvswitch/vport.h | 2 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 40 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 64 | ||||
-rw-r--r-- | net/tipc/addr.c | 1 | ||||
-rw-r--r-- | net/tipc/addr.h | 46 | ||||
-rw-r--r-- | net/tipc/msg.c | 23 | ||||
-rw-r--r-- | net/tipc/name_distr.c | 93 | ||||
-rw-r--r-- | net/tipc/name_table.c | 426 | ||||
-rw-r--r-- | net/tipc/name_table.h | 63 | ||||
-rw-r--r-- | net/tipc/net.c | 8 | ||||
-rw-r--r-- | net/tipc/node.c | 28 | ||||
-rw-r--r-- | net/tipc/socket.c | 319 | ||||
-rw-r--r-- | net/tipc/subscr.c | 86 | ||||
-rw-r--r-- | net/tipc/subscr.h | 14 |
30 files changed, 1042 insertions, 835 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 78ec2e1b14d1..59bc13b5f14f 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -4,6 +4,7 @@ #include <linux/if_vlan.h> #include <linux/netpoll.h> #include <linux/export.h> +#include <net/gro.h> #include "vlan.h" bool vlan_do_receive(struct sk_buff **skbp) @@ -495,7 +496,10 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); - pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); + + pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, + ipv6_gro_receive, inet_gro_receive, + head, skb); out_unlock: rcu_read_unlock(); @@ -515,7 +519,9 @@ static int vlan_gro_complete(struct sk_buff *skb, int nhoff) rcu_read_lock(); ptype = gro_find_complete_by_type(type); if (ptype) - err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr)); + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, + ipv6_gro_complete, inet_gro_complete, + skb, nhoff + sizeof(*vhdr)); rcu_read_unlock(); return err; diff --git a/net/Kconfig b/net/Kconfig index 0ead7ec0d2bd..9c456acc379e 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -245,6 +245,14 @@ source "net/l3mdev/Kconfig" source "net/qrtr/Kconfig" source "net/ncsi/Kconfig" +config PCPU_DEV_REFCNT + bool "Use percpu variables to maintain network device refcount" + depends on SMP + default y + help + network device refcount are using per cpu variables if this option is set. + This can be forced to N to detect underflows (with a performance drop). + config RPS bool depends on SMP && SYSFS diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index dfec65eca8a6..3db1def4437b 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -160,7 +160,9 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { if (p && (p->flags & BR_NEIGH_SUPPRESS)) return; - if (ipv4_is_zeronet(sip) || sip == tip) { + if (parp->ar_op != htons(ARPOP_RREQUEST) && + parp->ar_op != htons(ARPOP_RREPLY) && + (ipv4_is_zeronet(sip) || sip == tip)) { /* prevent flooding to neigh suppress ports */ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 169e005fbda2..0d3a8c01552e 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -35,7 +35,7 @@ static const struct rhashtable_params br_vlan_tunnel_rht_params = { }; static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl, - u64 tunnel_id) + __be64 tunnel_id) { return rhashtable_lookup_fast(tbl, &tunnel_id, br_vlan_tunnel_rht_params); diff --git a/net/core/dev.c b/net/core/dev.c index 6bc20eabd2b0..c9a496f5e687 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2451,16 +2451,14 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) EXPORT_SYMBOL(netdev_txq_to_tc); #ifdef CONFIG_XPS -struct static_key xps_needed __read_mostly; -EXPORT_SYMBOL(xps_needed); -struct static_key xps_rxqs_needed __read_mostly; -EXPORT_SYMBOL(xps_rxqs_needed); +static struct static_key xps_needed __read_mostly; +static struct static_key xps_rxqs_needed __read_mostly; static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) static bool remove_xps_queue(struct xps_dev_maps *dev_maps, - int tci, u16 index) + struct xps_dev_maps *old_maps, int tci, u16 index) { struct xps_map *map = NULL; int pos; @@ -2479,6 +2477,8 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps, break; } + if (old_maps) + RCU_INIT_POINTER(old_maps->attr_map[tci], NULL); RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); kfree_rcu(map, rcu); return false; @@ -2491,7 +2491,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev, struct xps_dev_maps *dev_maps, int cpu, u16 offset, u16 count) { - int num_tc = dev->num_tc ? : 1; + int num_tc = dev_maps->num_tc; bool active = false; int tci; @@ -2499,7 +2499,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev, int i, j; for (i = count, j = offset; i--; j++) { - if (!remove_xps_queue(dev_maps, tci, j)) + if (!remove_xps_queue(dev_maps, NULL, tci, j)) break; } @@ -2511,74 +2511,54 @@ static bool remove_xps_queue_cpu(struct net_device *dev, static void reset_xps_maps(struct net_device *dev, struct xps_dev_maps *dev_maps, - bool is_rxqs_map) + enum xps_map_type type) { - if (is_rxqs_map) { - static_key_slow_dec_cpuslocked(&xps_rxqs_needed); - RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); - } else { - RCU_INIT_POINTER(dev->xps_cpus_map, NULL); - } static_key_slow_dec_cpuslocked(&xps_needed); + if (type == XPS_RXQS) + static_key_slow_dec_cpuslocked(&xps_rxqs_needed); + + RCU_INIT_POINTER(dev->xps_maps[type], NULL); + kfree_rcu(dev_maps, rcu); } -static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, - struct xps_dev_maps *dev_maps, unsigned int nr_ids, - u16 offset, u16 count, bool is_rxqs_map) +static void clean_xps_maps(struct net_device *dev, enum xps_map_type type, + u16 offset, u16 count) { + struct xps_dev_maps *dev_maps; bool active = false; int i, j; - for (j = -1; j = netif_attrmask_next(j, mask, nr_ids), - j < nr_ids;) - active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, - count); + dev_maps = xmap_dereference(dev->xps_maps[type]); + if (!dev_maps) + return; + + for (j = 0; j < dev_maps->nr_ids; j++) + active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count); if (!active) - reset_xps_maps(dev, dev_maps, is_rxqs_map); + reset_xps_maps(dev, dev_maps, type); - if (!is_rxqs_map) { - for (i = offset + (count - 1); count--; i--) { + if (type == XPS_CPUS) { + for (i = offset + (count - 1); count--; i--) netdev_queue_numa_node_write( - netdev_get_tx_queue(dev, i), - NUMA_NO_NODE); - } + netdev_get_tx_queue(dev, i), NUMA_NO_NODE); } } static void netif_reset_xps_queues(struct net_device *dev, u16 offset, u16 count) { - const unsigned long *possible_mask = NULL; - struct xps_dev_maps *dev_maps; - unsigned int nr_ids; - if (!static_key_false(&xps_needed)) return; cpus_read_lock(); mutex_lock(&xps_map_mutex); - if (static_key_false(&xps_rxqs_needed)) { - dev_maps = xmap_dereference(dev->xps_rxqs_map); - if (dev_maps) { - nr_ids = dev->num_rx_queues; - clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, - offset, count, true); - } - } - - dev_maps = xmap_dereference(dev->xps_cpus_map); - if (!dev_maps) - goto out_no_maps; + if (static_key_false(&xps_rxqs_needed)) + clean_xps_maps(dev, XPS_RXQS, offset, count); - if (num_possible_cpus() > 1) - possible_mask = cpumask_bits(cpu_possible_mask); - nr_ids = nr_cpu_ids; - clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count, - false); + clean_xps_maps(dev, XPS_CPUS, offset, count); -out_no_maps: mutex_unlock(&xps_map_mutex); cpus_read_unlock(); } @@ -2628,16 +2608,35 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index, return new_map; } +/* Copy xps maps at a given index */ +static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps, + struct xps_dev_maps *new_dev_maps, int index, + int tc, bool skip_tc) +{ + int i, tci = index * dev_maps->num_tc; + struct xps_map *map; + + /* copy maps belonging to foreign traffic classes */ + for (i = 0; i < dev_maps->num_tc; i++, tci++) { + if (i == tc && skip_tc) + continue; + + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->attr_map[tci]); + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); + } +} + /* Must be called under cpus_read_lock */ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map) + u16 index, enum xps_map_type type) { - const unsigned long *online_mask = NULL, *possible_mask = NULL; - struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL; + const unsigned long *online_mask = NULL; + bool active = false, copy = false; int i, j, tci, numa_node_id = -2; int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; - bool active = false; unsigned int nr_ids; if (dev->num_tc) { @@ -2655,38 +2654,48 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, } mutex_lock(&xps_map_mutex); - if (is_rxqs_map) { + + dev_maps = xmap_dereference(dev->xps_maps[type]); + if (type == XPS_RXQS) { maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues); - dev_maps = xmap_dereference(dev->xps_rxqs_map); nr_ids = dev->num_rx_queues; } else { maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc); - if (num_possible_cpus() > 1) { + if (num_possible_cpus() > 1) online_mask = cpumask_bits(cpu_online_mask); - possible_mask = cpumask_bits(cpu_possible_mask); - } - dev_maps = xmap_dereference(dev->xps_cpus_map); nr_ids = nr_cpu_ids; } if (maps_sz < L1_CACHE_BYTES) maps_sz = L1_CACHE_BYTES; + /* The old dev_maps could be larger or smaller than the one we're + * setting up now, as dev->num_tc or nr_ids could have been updated in + * between. We could try to be smart, but let's be safe instead and only + * copy foreign traffic classes if the two map sizes match. + */ + if (dev_maps && + dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids) + copy = true; + /* allocate memory for queue storage */ for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids), j < nr_ids;) { - if (!new_dev_maps) - new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { - mutex_unlock(&xps_map_mutex); - return -ENOMEM; + new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); + if (!new_dev_maps) { + mutex_unlock(&xps_map_mutex); + return -ENOMEM; + } + + new_dev_maps->nr_ids = nr_ids; + new_dev_maps->num_tc = num_tc; } tci = j * num_tc + tc; - map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) : - NULL; + map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL; - map = expand_xps_map(map, j, index, is_rxqs_map); + map = expand_xps_map(map, j, index, type == XPS_RXQS); if (!map) goto error; @@ -2699,29 +2708,21 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, if (!dev_maps) { /* Increment static keys at most once per type */ static_key_slow_inc_cpuslocked(&xps_needed); - if (is_rxqs_map) + if (type == XPS_RXQS) static_key_slow_inc_cpuslocked(&xps_rxqs_needed); } - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - /* copy maps belonging to foreign traffic classes */ - for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); - } + for (j = 0; j < nr_ids; j++) { + bool skip_tc = false; - /* We need to explicitly update tci as prevous loop - * could break out early if dev_maps is NULL. - */ tci = j * num_tc + tc; - if (netif_attr_test_mask(j, mask, nr_ids) && netif_attr_test_online(j, online_mask, nr_ids)) { /* add tx-queue to CPU/rx-queue maps */ int pos = 0; + skip_tc = true; + map = xmap_dereference(new_dev_maps->attr_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; @@ -2729,78 +2730,81 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, if (pos == map->len) map->queues[map->len++] = index; #ifdef CONFIG_NUMA - if (!is_rxqs_map) { + if (type == XPS_CPUS) { if (numa_node_id == -2) numa_node_id = cpu_to_node(j); else if (numa_node_id != cpu_to_node(j)) numa_node_id = -1; } #endif - } else if (dev_maps) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); } - /* copy maps belonging to foreign traffic classes */ - for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); - } + if (copy) + xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc, + skip_tc); } - if (is_rxqs_map) - rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps); - else - rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps); + rcu_assign_pointer(dev->xps_maps[type], new_dev_maps); /* Cleanup old maps */ if (!dev_maps) goto out_no_old_maps; - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - for (i = num_tc, tci = j * num_tc; i--; tci++) { - new_map = xmap_dereference(new_dev_maps->attr_map[tci]); + for (j = 0; j < dev_maps->nr_ids; j++) { + for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) { map = xmap_dereference(dev_maps->attr_map[tci]); - if (map && map != new_map) - kfree_rcu(map, rcu); + if (!map) + continue; + + if (copy) { + new_map = xmap_dereference(new_dev_maps->attr_map[tci]); + if (map == new_map) + continue; + } + + RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); + kfree_rcu(map, rcu); } } - kfree_rcu(dev_maps, rcu); + old_dev_maps = dev_maps; out_no_old_maps: dev_maps = new_dev_maps; active = true; out_no_new_maps: - if (!is_rxqs_map) { + if (type == XPS_CPUS) /* update Tx queue numa node */ netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), (numa_node_id >= 0) ? numa_node_id : NUMA_NO_NODE); - } if (!dev_maps) goto out_no_maps; /* removes tx-queue from unused CPUs/rx-queues */ - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - for (i = tc, tci = j * num_tc; i--; tci++) - active |= remove_xps_queue(dev_maps, tci, index); - if (!netif_attr_test_mask(j, mask, nr_ids) || - !netif_attr_test_online(j, online_mask, nr_ids)) - active |= remove_xps_queue(dev_maps, tci, index); - for (i = num_tc - tc, tci++; --i; tci++) - active |= remove_xps_queue(dev_maps, tci, index); + for (j = 0; j < dev_maps->nr_ids; j++) { + tci = j * dev_maps->num_tc; + + for (i = 0; i < dev_maps->num_tc; i++, tci++) { + if (i == tc && + netif_attr_test_mask(j, mask, dev_maps->nr_ids) && + netif_attr_test_online(j, online_mask, dev_maps->nr_ids)) + continue; + + active |= remove_xps_queue(dev_maps, + copy ? old_dev_maps : NULL, + tci, index); + } } + if (old_dev_maps) + kfree_rcu(old_dev_maps, rcu); + /* free map if not active */ if (!active) - reset_xps_maps(dev, dev_maps, is_rxqs_map); + reset_xps_maps(dev, dev_maps, type); out_no_maps: mutex_unlock(&xps_map_mutex); @@ -2808,11 +2812,10 @@ out_no_maps: return 0; error: /* remove any maps that we added */ - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { + for (j = 0; j < nr_ids; j++) { for (i = num_tc, tci = j * num_tc; i--; tci++) { new_map = xmap_dereference(new_dev_maps->attr_map[tci]); - map = dev_maps ? + map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL; if (new_map && new_map != map) @@ -2833,7 +2836,7 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, int ret; cpus_read_lock(); - ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, false); + ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS); cpus_read_unlock(); return ret; @@ -3944,13 +3947,15 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb, struct xps_dev_maps *dev_maps, unsigned int tci) { + int tc = netdev_get_prio_tc_map(dev, skb->priority); struct xps_map *map; int queue_index = -1; - if (dev->num_tc) { - tci *= dev->num_tc; - tci += netdev_get_prio_tc_map(dev, skb->priority); - } + if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids) + return queue_index; + + tci *= dev_maps->num_tc; + tci += tc; map = rcu_dereference(dev_maps->attr_map[tci]); if (map) { @@ -3981,18 +3986,18 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev, if (!static_key_false(&xps_rxqs_needed)) goto get_cpus_map; - dev_maps = rcu_dereference(sb_dev->xps_rxqs_map); + dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]); if (dev_maps) { int tci = sk_rx_queue_get(sk); - if (tci >= 0 && tci < dev->num_rx_queues) + if (tci >= 0) queue_index = __get_xps_queue_idx(dev, skb, dev_maps, tci); } get_cpus_map: if (queue_index < 0) { - dev_maps = rcu_dereference(sb_dev->xps_cpus_map); + dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]); if (dev_maps) { unsigned int tci = skb->sender_cpu - 1; @@ -10305,11 +10310,15 @@ EXPORT_SYMBOL(register_netdev); int netdev_refcnt_read(const struct net_device *dev) { +#ifdef CONFIG_PCPU_DEV_REFCNT int i, refcnt = 0; for_each_possible_cpu(i) refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); return refcnt; +#else + return refcount_read(&dev->dev_refcnt); +#endif } EXPORT_SYMBOL(netdev_refcnt_read); @@ -10337,7 +10346,7 @@ static void netdev_wait_allrefs(struct net_device *dev) rebroadcast_time = warning_time = jiffies; refcnt = netdev_refcnt_read(dev); - while (refcnt != 0) { + while (refcnt != 1) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { rtnl_lock(); @@ -10374,7 +10383,7 @@ static void netdev_wait_allrefs(struct net_device *dev) refcnt = netdev_refcnt_read(dev); - if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) { + if (refcnt != 1 && time_after(jiffies, warning_time + 10 * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, refcnt); warning_time = jiffies; @@ -10450,7 +10459,7 @@ void netdev_run_todo(void) netdev_wait_allrefs(dev); /* paranoia */ - BUG_ON(netdev_refcnt_read(dev)); + BUG_ON(netdev_refcnt_read(dev) != 1); BUG_ON(!list_empty(&dev->ptype_all)); BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); @@ -10667,9 +10676,14 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; +#ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) goto free_dev; + dev_hold(dev); +#else + refcount_set(&dev->dev_refcnt, 1); +#endif if (dev_addr_init(dev)) goto free_pcpu; @@ -10733,8 +10747,10 @@ free_all: return NULL; free_pcpu: +#ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); free_dev: +#endif netdev_freemem(dev); return NULL; } @@ -10776,8 +10792,10 @@ void free_netdev(struct net_device *dev) list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); +#ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; +#endif free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 571f191c06d9..1eb02c2236f2 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1731,7 +1731,7 @@ static void exit_net_drop_monitor(void) /* * Because of the module_get/put we do in the trace state change path - * we are guarnateed not to have any current users when we get here + * we are guaranteed not to have any current users when we get here */ for_each_possible_cpu(cpu) { diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index c714e6a9dad4..d8b9dbabd4a4 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -10,9 +10,6 @@ #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) -extern struct list_head ptype_all __read_mostly; -extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; - static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 307628fdf380..f6197774048b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1361,83 +1361,94 @@ static const struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static ssize_t xps_cpus_show(struct netdev_queue *queue, - char *buf) +static ssize_t xps_queue_show(struct net_device *dev, unsigned int index, + int tc, char *buf, enum xps_map_type type) { - int cpu, len, ret, num_tc = 1, tc = 0; - struct net_device *dev = queue->dev; struct xps_dev_maps *dev_maps; - cpumask_var_t mask; - unsigned long index; - - if (!netif_is_multiqueue(dev)) - return -ENOENT; + unsigned long *mask; + unsigned int nr_ids; + int j, len; - index = get_netdev_queue_index(queue); + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps[type]); - if (!rtnl_trylock()) - return restart_syscall(); + /* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0 + * when dev_maps hasn't been allocated yet, to be backward compatible. + */ + nr_ids = dev_maps ? dev_maps->nr_ids : + (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues); - if (dev->num_tc) { - /* Do not allow XPS on subordinate device directly */ - num_tc = dev->num_tc; - if (num_tc < 0) { - ret = -EINVAL; - goto err_rtnl_unlock; - } + mask = bitmap_zalloc(nr_ids, GFP_NOWAIT); + if (!mask) { + rcu_read_unlock(); + return -ENOMEM; + } - /* If queue belongs to subordinate dev use its map */ - dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; + if (!dev_maps || tc >= dev_maps->num_tc) + goto out_no_maps; - tc = netdev_txq_to_tc(dev, index); - if (tc < 0) { - ret = -EINVAL; - goto err_rtnl_unlock; - } - } + for (j = 0; j < nr_ids; j++) { + int i, tci = j * dev_maps->num_tc + tc; + struct xps_map *map; - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_rtnl_unlock; - } + map = rcu_dereference(dev_maps->attr_map[tci]); + if (!map) + continue; - rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_cpus_map); - if (dev_maps) { - for_each_possible_cpu(cpu) { - int i, tci = cpu * num_tc + tc; - struct xps_map *map; - - map = rcu_dereference(dev_maps->attr_map[tci]); - if (!map) - continue; - - for (i = map->len; i--;) { - if (map->queues[i] == index) { - cpumask_set_cpu(cpu, mask); - break; - } + for (i = map->len; i--;) { + if (map->queues[i] == index) { + set_bit(j, mask); + break; } } } +out_no_maps: rcu_read_unlock(); - rtnl_unlock(); + len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids); + bitmap_free(mask); - len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); - free_cpumask_var(mask); return len < PAGE_SIZE ? len : -EINVAL; +} + +static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) +{ + struct net_device *dev = queue->dev; + unsigned int index; + int len, tc; + + if (!netif_is_multiqueue(dev)) + return -ENOENT; -err_rtnl_unlock: + index = get_netdev_queue_index(queue); + + if (!rtnl_trylock()) + return restart_syscall(); + + /* If queue belongs to subordinate dev use its map */ + dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; + + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) { + rtnl_unlock(); + return -EINVAL; + } + + /* Make sure the subordinate device can't be freed */ + get_device(&dev->dev); rtnl_unlock(); - return ret; + + len = xps_queue_show(dev, index, tc, buf, XPS_CPUS); + + put_device(&dev->dev); + return len; } static ssize_t xps_cpus_store(struct netdev_queue *queue, const char *buf, size_t len) { struct net_device *dev = queue->dev; - unsigned long index; + unsigned int index; cpumask_var_t mask; int err; @@ -1476,64 +1487,21 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) { - int j, len, ret, num_tc = 1, tc = 0; struct net_device *dev = queue->dev; - struct xps_dev_maps *dev_maps; - unsigned long *mask, index; + unsigned int index; + int tc; index = get_netdev_queue_index(queue); if (!rtnl_trylock()) return restart_syscall(); - if (dev->num_tc) { - num_tc = dev->num_tc; - tc = netdev_txq_to_tc(dev, index); - if (tc < 0) { - ret = -EINVAL; - goto err_rtnl_unlock; - } - } - mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL); - if (!mask) { - ret = -ENOMEM; - goto err_rtnl_unlock; - } - - rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_rxqs_map); - if (!dev_maps) - goto out_no_maps; - - for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues), - j < dev->num_rx_queues;) { - int i, tci = j * num_tc + tc; - struct xps_map *map; - - map = rcu_dereference(dev_maps->attr_map[tci]); - if (!map) - continue; - - for (i = map->len; i--;) { - if (map->queues[i] == index) { - set_bit(j, mask); - break; - } - } - } -out_no_maps: - rcu_read_unlock(); - + tc = netdev_txq_to_tc(dev, index); rtnl_unlock(); + if (tc < 0) + return -EINVAL; - len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues); - bitmap_free(mask); - - return len < PAGE_SIZE ? len : -EINVAL; - -err_rtnl_unlock: - rtnl_unlock(); - return ret; + return xps_queue_show(dev, index, tc, buf, XPS_RXQS); } static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, @@ -1541,7 +1509,8 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, { struct net_device *dev = queue->dev; struct net *net = dev_net(dev); - unsigned long *mask, index; + unsigned long *mask; + unsigned int index; int err; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -1565,7 +1534,7 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, } cpus_read_lock(); - err = __netif_set_xps_queue(dev, mask, index, true); + err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS); cpus_read_unlock(); rtnl_unlock(); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 37c90c1fdc93..2f1497797861 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -84,8 +84,7 @@ #include <net/dn_neigh.h> #include <net/dn_fib.h> -struct dn_rt_hash_bucket -{ +struct dn_rt_hash_bucket { struct dn_route __rcu *chain; spinlock_t lock; }; @@ -359,7 +358,8 @@ static void dn_run_flush(struct timer_list *unused) for (i = 0; i < dn_rt_hash_mask; i++) { spin_lock_bh(&dn_rt_hash_table[i].lock); - if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL) + rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL); + if (!rt) goto nothing_to_declare; for(; rt; rt = next) { @@ -425,7 +425,8 @@ static int dn_return_short(struct sk_buff *skb) /* Add back headers */ skb_push(skb, skb->data - skb_network_header(skb)); - if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) return NET_RX_DROP; cb = DN_SKB_CB(skb); @@ -461,7 +462,8 @@ static int dn_return_long(struct sk_buff *skb) /* Add back all headers */ skb_push(skb, skb->data - skb_network_header(skb)); - if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) return NET_RX_DROP; cb = DN_SKB_CB(skb); @@ -505,7 +507,8 @@ static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff * struct dn_skb_cb *cb; int err; - if ((err = dn_route_input(skb)) == 0) + err = dn_route_input(skb); + if (err == 0) return dst_input(skb); cb = DN_SKB_CB(skb); @@ -629,7 +632,8 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type if (dn == NULL) goto dump_it; - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) goto out; if (!pskb_may_pull(skb, 3)) @@ -1324,7 +1328,8 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(in_dev); - if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL) + dn_db = rcu_dereference(in_dev->dn_ptr); + if (!dn_db) goto out; /* Zero source addresses are not allowed */ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 58b8fc82cd3c..8746b07668ae 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -1,15 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -config HAVE_NET_DSA - def_bool y - depends on INET && NETDEVICES && !S390 - -# Drivers must select NET_DSA and the appropriate tagging format menuconfig NET_DSA tristate "Distributed Switch Architecture" - depends on HAVE_NET_DSA depends on BRIDGE || BRIDGE=n depends on HSR || HSR=n + depends on INET && NETDEVICES select GRO_CELLS select NET_SWITCHDEV select PHYLINK @@ -20,7 +15,8 @@ menuconfig NET_DSA if NET_DSA -# tagging formats +# Drivers must select the appropriate tagging format(s) + config NET_DSA_TAG_8021Q tristate select VLAN_8021Q @@ -48,6 +44,13 @@ config NET_DSA_TAG_BRCM Say Y if you want to enable support for tagging frames for the Broadcom switches which place the tag after the MAC source address. +config NET_DSA_TAG_BRCM_LEGACY + tristate "Tag driver for Broadcom legacy switches using in-frame headers" + select NET_DSA_TAG_BRCM_COMMON + help + Say Y if you want to enable support for tagging frames for the + Broadcom legacy switches which place the tag after the MAC source + address. config NET_DSA_TAG_BRCM_PREPEND tristate "Tag driver for Broadcom switches using prepended headers" diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9d4b0e9b1aa1..4c43c5406834 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -233,19 +233,7 @@ extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, struct net_device *dev) { - /* Switchdev offloading can be configured on: */ - - if (dev == dp->slave) - /* DSA ports directly connected to a bridge, and event - * was emitted for the ports themselves. - */ - return true; - - if (dp->lag_dev == dev) - /* DSA ports connected to a bridge via a LAG */ - return true; - - return false; + return dsa_port_to_bridge_port(dp) == dev; } static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index e2577a7dcbca..40e9f3098c8d 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -12,9 +12,26 @@ #include "dsa_priv.h" -/* This tag length is 4 bytes, older ones were 6 bytes, we do not - * handle them - */ +/* Legacy Broadcom tag (6 bytes) */ +#define BRCM_LEG_TAG_LEN 6 + +/* Type fields */ +/* 1st byte in the tag */ +#define BRCM_LEG_TYPE_HI 0x88 +/* 2nd byte in the tag */ +#define BRCM_LEG_TYPE_LO 0x74 + +/* Tag fields */ +/* 3rd byte in the tag */ +#define BRCM_LEG_UNICAST (0 << 5) +#define BRCM_LEG_MULTICAST (1 << 5) +#define BRCM_LEG_EGRESS (2 << 5) +#define BRCM_LEG_INGRESS (3 << 5) + +/* 6th byte in the tag */ +#define BRCM_LEG_PORT_ID (0xf) + +/* Newer Broadcom tag (4 bytes) */ #define BRCM_TAG_LEN 4 /* Tag is constructed and desconstructed using byte by byte access @@ -195,6 +212,87 @@ DSA_TAG_DRIVER(brcm_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM); #endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) +static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + u8 *brcm_tag; + + /* The Ethernet switch we are interfaced with needs packets to be at + * least 64 bytes (including FCS) otherwise they will be discarded when + * they enter the switch port logic. When Broadcom tags are enabled, we + * need to make sure that packets are at least 70 bytes + * (including FCS and tag) because the length verification is done after + * the Broadcom tag is stripped off the ingress packet. + * + * Let dsa_slave_xmit() free the SKB + */ + if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false)) + return NULL; + + skb_push(skb, BRCM_LEG_TAG_LEN); + + memmove(skb->data, skb->data + BRCM_LEG_TAG_LEN, 2 * ETH_ALEN); + + brcm_tag = skb->data + 2 * ETH_ALEN; + + /* Broadcom tag type */ + brcm_tag[0] = BRCM_LEG_TYPE_HI; + brcm_tag[1] = BRCM_LEG_TYPE_LO; + + /* Broadcom tag value */ + brcm_tag[2] = BRCM_LEG_EGRESS; + brcm_tag[3] = 0; + brcm_tag[4] = 0; + brcm_tag[5] = dp->index & BRCM_LEG_PORT_ID; + + return skb; +} + +static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt) +{ + int source_port; + u8 *brcm_tag; + + if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID))) + return NULL; + + brcm_tag = skb->data - 2; + + source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; + + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) + return NULL; + + /* Remove Broadcom tag and update checksum */ + skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN); + + skb->offload_fwd_mark = 1; + + /* Move the Ethernet DA and SA */ + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - BRCM_LEG_TAG_LEN, + 2 * ETH_ALEN); + + return skb; +} + +static const struct dsa_device_ops brcm_legacy_netdev_ops = { + .name = "brcm-legacy", + .proto = DSA_TAG_PROTO_BRCM_LEGACY, + .xmit = brcm_leg_tag_xmit, + .rcv = brcm_leg_tag_rcv, + .overhead = BRCM_LEG_TAG_LEN, +}; + +DSA_TAG_DRIVER(brcm_legacy_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY); +#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */ + #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, struct net_device *dev) @@ -227,6 +325,9 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = { #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) &DSA_TAG_DRIVER_NAME(brcm_netdev_ops), #endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) + &DSA_TAG_DRIVER_NAME(brcm_legacy_netdev_ops), +#endif #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) &DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops), #endif diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index e01cf766d2c5..933b427122be 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -58,6 +58,7 @@ #include <net/ip.h> #include <net/dsa.h> #include <net/flow_dissector.h> +#include <net/gro.h> #include <linux/uaccess.h> #include <net/pkt_sched.h> @@ -449,7 +450,10 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb) skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); - pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); + + pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, + ipv6_gro_receive, inet_gro_receive, + head, skb); out_unlock: rcu_read_unlock(); @@ -473,8 +477,9 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff) rcu_read_lock(); ptype = gro_find_complete_by_type(type); if (ptype != NULL) - err = ptype->callbacks.gro_complete(skb, nhoff + - sizeof(struct ethhdr)); + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, + ipv6_gro_complete, inet_gro_complete, + skb, nhoff + sizeof(*eh)); rcu_read_unlock(); return err; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 24783b71c584..0788cc3b3114 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1844,6 +1844,18 @@ out: return ret; } +__printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(*data, ETH_GSTRING_LEN, fmt, args); + va_end(args); + + *data += ETH_GSTRING_LEN; +} +EXPORT_SYMBOL(ethtool_sprintf); + static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 4cfd9e829c7b..99f3af1a9d4d 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -108,7 +108,7 @@ void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) /* hsr_debugfs_term - Tear down debugfs intrastructure * * Description: - * When Debufs is configured this routine removes debugfs file system + * When Debugfs is configured this routine removes debugfs file system * elements that are specific to hsr */ void diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 203890e378cb..2ee20743cb41 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -802,7 +802,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) u16 version; int length; - /* UDP has verifed checksum */ + /* UDP has verified checksum */ /* UDP always verifies the packet length. */ __skb_pull(skb, sizeof(struct udphdr)); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 1eb7495ac5b4..8a930ca6d6b1 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -20,7 +20,7 @@ struct vport; struct vport_parms; -/* The following definitions are for users of the vport subsytem: */ +/* The following definitions are for users of the vport subsystem: */ int ovs_vport_init(void); void ovs_vport_exit(void); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d097b5c15faa..9736df97e04d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -209,16 +209,16 @@ static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter, struct fl_flow_key *key, struct fl_flow_key *mkey) { - __be16 min_mask, max_mask, min_val, max_val; + u16 min_mask, max_mask, min_val, max_val; - min_mask = htons(filter->mask->key.tp_range.tp_min.dst); - max_mask = htons(filter->mask->key.tp_range.tp_max.dst); - min_val = htons(filter->key.tp_range.tp_min.dst); - max_val = htons(filter->key.tp_range.tp_max.dst); + min_mask = ntohs(filter->mask->key.tp_range.tp_min.dst); + max_mask = ntohs(filter->mask->key.tp_range.tp_max.dst); + min_val = ntohs(filter->key.tp_range.tp_min.dst); + max_val = ntohs(filter->key.tp_range.tp_max.dst); if (min_mask && max_mask) { - if (htons(key->tp_range.tp.dst) < min_val || - htons(key->tp_range.tp.dst) > max_val) + if (ntohs(key->tp_range.tp.dst) < min_val || + ntohs(key->tp_range.tp.dst) > max_val) return false; /* skb does not have min and max values */ @@ -232,16 +232,16 @@ static bool fl_range_port_src_cmp(struct cls_fl_filter *filter, struct fl_flow_key *key, struct fl_flow_key *mkey) { - __be16 min_mask, max_mask, min_val, max_val; + u16 min_mask, max_mask, min_val, max_val; - min_mask = htons(filter->mask->key.tp_range.tp_min.src); - max_mask = htons(filter->mask->key.tp_range.tp_max.src); - min_val = htons(filter->key.tp_range.tp_min.src); - max_val = htons(filter->key.tp_range.tp_max.src); + min_mask = ntohs(filter->mask->key.tp_range.tp_min.src); + max_mask = ntohs(filter->mask->key.tp_range.tp_max.src); + min_val = ntohs(filter->key.tp_range.tp_min.src); + max_val = ntohs(filter->key.tp_range.tp_max.src); if (min_mask && max_mask) { - if (htons(key->tp_range.tp.src) < min_val || - htons(key->tp_range.tp.src) > max_val) + if (ntohs(key->tp_range.tp.src) < min_val || + ntohs(key->tp_range.tp.src) > max_val) return false; /* skb does not have min and max values */ @@ -783,16 +783,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src)); if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && - htons(key->tp_range.tp_max.dst) <= - htons(key->tp_range.tp_min.dst)) { + ntohs(key->tp_range.tp_max.dst) <= + ntohs(key->tp_range.tp_min.dst)) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_FLOWER_KEY_PORT_DST_MIN], "Invalid destination port range (min must be strictly smaller than max)"); return -EINVAL; } if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src && - htons(key->tp_range.tp_max.src) <= - htons(key->tp_range.tp_min.src)) { + ntohs(key->tp_range.tp_max.src) <= + ntohs(key->tp_range.tp_min.src)) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_FLOWER_KEY_PORT_SRC_MIN], "Invalid source port range (min must be strictly smaller than max)"); @@ -1044,8 +1044,8 @@ static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key, return -EINVAL; } - key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS])); - mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK])); + key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS])); + mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK])); *flags_key = 0; *flags_mask = 0; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8287894541e3..922ed6b91abb 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -411,18 +411,10 @@ done: return txtime; } -static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) +static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, + struct Qdisc *child, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); - struct Qdisc *child; - int queue; - - queue = skb_get_queue_mapping(skb); - - child = q->qdiscs[queue]; - if (unlikely(!child)) - return qdisc_drop(skb, sch, to_free); if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) @@ -439,6 +431,58 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue(skb, child, to_free); } +static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct Qdisc *child; + int queue; + + queue = skb_get_queue_mapping(skb); + + child = q->qdiscs[queue]; + if (unlikely(!child)) + return qdisc_drop(skb, sch, to_free); + + /* Large packets might not be transmitted when the transmission duration + * exceeds any configured interval. Therefore, segment the skb into + * smaller chunks. Skip it for the full offload case, as the driver + * and/or the hardware is expected to handle this. + */ + if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) { + unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb); + netdev_features_t features = netif_skb_features(skb); + struct sk_buff *segs, *nskb; + int ret; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) + return qdisc_drop(skb, sch, to_free); + + skb_list_walk_safe(segs, segs, nskb) { + skb_mark_not_on_list(segs); + qdisc_skb_cb(segs)->pkt_len = segs->len; + slen += segs->len; + + ret = taprio_enqueue_one(segs, sch, child, to_free); + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + qdisc_qstats_drop(sch); + } else { + numsegs++; + } + } + + if (numsegs > 1) + qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen); + consume_skb(skb); + + return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; + } + + return taprio_enqueue_one(skb, sch, child, to_free); +} + static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); diff --git a/net/tipc/addr.c b/net/tipc/addr.c index abe29d1aa23a..fd0796269eed 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 1a11831bef62..0772cfadaa0d 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, Wind River Systems - * Copyright (c) 2020, Red Hat Inc + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,6 +44,50 @@ #include <net/netns/generic.h> #include "core.h" +/* Struct tipc_uaddr: internal version of struct sockaddr_tipc. + * Must be kept aligned both regarding field positions and size. + */ +struct tipc_uaddr { + unsigned short family; + unsigned char addrtype; + signed char scope; + union { + struct { + struct tipc_service_addr sa; + u32 lookup_node; + }; + struct tipc_service_range sr; + struct tipc_socket_addr sk; + }; +}; + +static inline void tipc_uaddr(struct tipc_uaddr *ua, u32 atype, u32 scope, + u32 type, u32 lower, u32 upper) +{ + ua->family = AF_TIPC; + ua->addrtype = atype; + ua->scope = scope; + ua->sr.type = type; + ua->sr.lower = lower; + ua->sr.upper = upper; +} + +static inline bool tipc_uaddr_valid(struct tipc_uaddr *ua, int len) +{ + u32 atype; + + if (len < sizeof(struct sockaddr_tipc)) + return false; + atype = ua->addrtype; + if (ua->family != AF_TIPC) + return false; + if (atype == TIPC_SERVICE_ADDR || atype == TIPC_SOCKET_ADDR) + return true; + if (atype == TIPC_SERVICE_RANGE) + return ua->sr.upper >= ua->sr.lower; + return false; +} + static inline u32 tipc_own_addr(struct net *net) { return tipc_net(net)->node_addr; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e9263280a2d4..3f0a25345a7c 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -707,8 +707,11 @@ bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy) bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) { struct tipc_msg *msg = buf_msg(skb); - u32 dport, dnode; - u32 onode = tipc_own_addr(net); + u32 scope = msg_lookup_scope(msg); + u32 self = tipc_own_addr(net); + u32 inst = msg_nameinst(msg); + struct tipc_socket_addr sk; + struct tipc_uaddr ua; if (!msg_isdata(msg)) return false; @@ -722,16 +725,16 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; - dnode = tipc_scope2node(net, msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(net, msg_nametype(msg), - msg_nameinst(msg), &dnode); - if (!dport) + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope, + msg_nametype(msg), inst, inst); + sk.node = tipc_scope2node(net, scope); + if (!tipc_nametbl_lookup_anycast(net, &ua, &sk)) return false; msg_incr_reroute_cnt(msg); - if (dnode != onode) - msg_set_prevnode(msg, onode); - msg_set_destnode(msg, dnode); - msg_set_destport(msg, dport); + if (sk.node != self) + msg_set_prevnode(msg, self); + msg_set_destnode(msg, sk.node); + msg_set_destport(msg, sk.ref); *err = TIPC_OK; return true; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 6cf57c3bfa27..bda902caa814 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -1,8 +1,9 @@ /* * net/tipc/name_distr.c: TIPC name distribution code * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2019, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,10 +56,10 @@ struct distr_queue_item { */ static void publ_to_item(struct distr_item *i, struct publication *p) { - i->type = htonl(p->type); - i->lower = htonl(p->lower); - i->upper = htonl(p->upper); - i->port = htonl(p->port); + i->type = htonl(p->sr.type); + i->lower = htonl(p->sr.lower); + i->upper = htonl(p->sr.upper); + i->port = htonl(p->sk.ref); i->key = htonl(p->key); } @@ -90,20 +91,20 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, /** * tipc_named_publish - tell other nodes about a new publication by this node * @net: the associated network namespace - * @publ: the new publication + * @p: the new publication */ -struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) +struct sk_buff *tipc_named_publish(struct net *net, struct publication *p) { struct name_table *nt = tipc_name_table(net); struct distr_item *item; struct sk_buff *skb; - if (publ->scope == TIPC_NODE_SCOPE) { - list_add_tail_rcu(&publ->binding_node, &nt->node_scope); + if (p->scope == TIPC_NODE_SCOPE) { + list_add_tail_rcu(&p->binding_node, &nt->node_scope); return NULL; } write_lock_bh(&nt->cluster_scope_lock); - list_add_tail(&publ->binding_node, &nt->cluster_scope); + list_add_tail(&p->binding_node, &nt->cluster_scope); write_unlock_bh(&nt->cluster_scope_lock); skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); if (!skb) { @@ -113,25 +114,25 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); msg_set_non_legacy(buf_msg(skb)); item = (struct distr_item *)msg_data(buf_msg(skb)); - publ_to_item(item, publ); + publ_to_item(item, p); return skb; } /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node * @net: the associated network namespace - * @publ: the withdrawn publication + * @p: the withdrawn publication */ -struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p) { struct name_table *nt = tipc_name_table(net); struct distr_item *item; struct sk_buff *skb; write_lock_bh(&nt->cluster_scope_lock); - list_del(&publ->binding_node); + list_del(&p->binding_node); write_unlock_bh(&nt->cluster_scope_lock); - if (publ->scope == TIPC_NODE_SCOPE) + if (p->scope == TIPC_NODE_SCOPE) return NULL; skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); @@ -142,7 +143,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); msg_set_non_legacy(buf_msg(skb)); item = (struct distr_item *)msg_data(buf_msg(skb)); - publ_to_item(item, publ); + publ_to_item(item, p); return skb; } @@ -233,33 +234,27 @@ void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities) /** * tipc_publ_purge - remove publication associated with a failed node * @net: the associated network namespace - * @publ: the publication to remove + * @p: the publication to remove * @addr: failed node's address * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. */ -static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) +static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr) { struct tipc_net *tn = tipc_net(net); - struct publication *p; + struct publication *_p; + struct tipc_uaddr ua; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type, + p->sr.lower, p->sr.upper); spin_lock_bh(&tn->nametbl_lock); - p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper, - publ->node, publ->key); - if (p) - tipc_node_unsubscribe(net, &p->binding_node, addr); + _p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key); + if (_p) + tipc_node_unsubscribe(net, &_p->binding_node, addr); spin_unlock_bh(&tn->nametbl_lock); - - if (p != publ) { - pr_err("Unable to remove publication from failed node\n" - " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n", - publ->type, publ->lower, publ->node, publ->port, - publ->key); - } - - if (p) - kfree_rcu(p, rcu); + if (_p) + kfree_rcu(_p, rcu); } void tipc_publ_notify(struct net *net, struct list_head *nsub_list, @@ -293,30 +288,30 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, u32 node, u32 dtype) { struct publication *p = NULL; - u32 lower = ntohl(i->lower); - u32 upper = ntohl(i->upper); - u32 type = ntohl(i->type); - u32 port = ntohl(i->port); + struct tipc_socket_addr sk; + struct tipc_uaddr ua; u32 key = ntohl(i->key); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + ntohl(i->type), ntohl(i->lower), ntohl(i->upper)); + sk.ref = ntohl(i->port); + sk.node = node; + if (dtype == PUBLICATION) { - p = tipc_nametbl_insert_publ(net, type, lower, upper, - TIPC_CLUSTER_SCOPE, node, - port, key); + p = tipc_nametbl_insert_publ(net, &ua, &sk, key); if (p) { tipc_node_subscribe(net, &p->binding_node, node); return true; } } else if (dtype == WITHDRAWAL) { - p = tipc_nametbl_remove_publ(net, type, lower, - upper, node, key); + p = tipc_nametbl_remove_publ(net, &ua, &sk, key); if (p) { tipc_node_unsubscribe(net, &p->binding_node, node); kfree_rcu(p, rcu); return true; } - pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n", - type, lower, node); + pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n", + ua.sr.type, ua.sr.lower, node); } else { pr_warn("Unrecognized name table message received\n"); } @@ -410,15 +405,15 @@ void tipc_named_reinit(struct net *net) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); - struct publication *publ; + struct publication *p; u32 self = tipc_own_addr(net); spin_lock_bh(&tn->nametbl_lock); - list_for_each_entry_rcu(publ, &nt->node_scope, binding_node) - publ->node = self; - list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node) - publ->node = self; + list_for_each_entry_rcu(p, &nt->node_scope, binding_node) + p->sk.node = self; + list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node) + p->sk.node = self; nt->rc_dests = 0; spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index ee5ac40ea2b6..6db9f9e7c0ac 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems - * Copyright (c) 2020, Red Hat Inc + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -222,59 +222,57 @@ static int hash(int x) /** * tipc_publ_create - create a publication structure - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound - * @scope: publication scope - * @node: network address of publishing socket - * @port: publishing port + * @ua: the service range the user is binding to + * @sk: the address of the socket that is bound * @key: publication key */ -static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, +static struct publication *tipc_publ_create(struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) { - struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); + struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC); - if (!publ) + if (!p) return NULL; - publ->type = type; - publ->lower = lower; - publ->upper = upper; - publ->scope = scope; - publ->node = node; - publ->port = port; - publ->key = key; - INIT_LIST_HEAD(&publ->binding_sock); - INIT_LIST_HEAD(&publ->binding_node); - INIT_LIST_HEAD(&publ->local_publ); - INIT_LIST_HEAD(&publ->all_publ); - INIT_LIST_HEAD(&publ->list); - return publ; + p->sr = ua->sr; + p->sk = *sk; + p->scope = ua->scope; + p->key = key; + INIT_LIST_HEAD(&p->binding_sock); + INIT_LIST_HEAD(&p->binding_node); + INIT_LIST_HEAD(&p->local_publ); + INIT_LIST_HEAD(&p->all_publ); + INIT_LIST_HEAD(&p->list); + return p; } /** * tipc_service_create - create a service structure for the specified 'type' - * @type: service type - * @hd: name_table services list + * @net: network namespace + * @ua: address representing the service to be bound * * Allocates a single range structure and sets it to all 0's. */ -static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd) +static struct tipc_service *tipc_service_create(struct net *net, + struct tipc_uaddr *ua) { - struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC); + struct name_table *nt = tipc_name_table(net); + struct tipc_service *service; + struct hlist_head *hd; + service = kzalloc(sizeof(*service), GFP_ATOMIC); if (!service) { pr_warn("Service creation failed, no memory\n"); return NULL; } spin_lock_init(&service->lock); - service->type = type; + service->type = ua->sr.type; service->ranges = RB_ROOT; INIT_HLIST_NODE(&service->service_list); INIT_LIST_HEAD(&service->subscriptions); + hd = &nt->services[hash(ua->sr.type)]; hlist_add_head_rcu(&service->service_list, hd); return service; } @@ -282,13 +280,13 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd) /* tipc_service_find_range - find service range matching publication parameters */ static struct service_range *tipc_service_find_range(struct tipc_service *sc, - u32 lower, u32 upper) + struct tipc_uaddr *ua) { struct service_range *sr; - service_range_foreach_match(sr, sc, lower, upper) { + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { /* Look for exact match */ - if (sr->lower == lower && sr->upper == upper) + if (sr->lower == ua->sr.lower && sr->upper == ua->sr.upper) return sr; } @@ -296,10 +294,12 @@ static struct service_range *tipc_service_find_range(struct tipc_service *sc, } static struct service_range *tipc_service_create_range(struct tipc_service *sc, - u32 lower, u32 upper) + struct publication *p) { struct rb_node **n, *parent = NULL; struct service_range *sr; + u32 lower = p->sr.lower; + u32 upper = p->sr.upper; n = &sc->ranges.rb_node; while (*n) { @@ -327,64 +327,68 @@ static struct service_range *tipc_service_create_range(struct tipc_service *sc, return sr; } -static struct publication *tipc_service_insert_publ(struct net *net, - struct tipc_service *sc, - u32 type, u32 lower, - u32 upper, u32 scope, - u32 node, u32 port, - u32 key) +static bool tipc_service_insert_publ(struct net *net, + struct tipc_service *sc, + struct publication *p) { struct tipc_subscription *sub, *tmp; struct service_range *sr; - struct publication *p; + struct publication *_p; + u32 node = p->sk.node; bool first = false; + bool res = false; + u32 key = p->key; - sr = tipc_service_create_range(sc, lower, upper); + spin_lock_bh(&sc->lock); + sr = tipc_service_create_range(sc, p); if (!sr) - goto err; + goto exit; first = list_empty(&sr->all_publ); /* Return if the publication already exists */ - list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->key == key && (!p->node || p->node == node)) - return NULL; + list_for_each_entry(_p, &sr->all_publ, all_publ) { + if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) { + pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n", + p->sr.type, p->sr.lower, p->sr.upper, + node, p->sk.ref, key); + goto exit; + } } - /* Create and insert publication */ - p = tipc_publ_create(type, lower, upper, scope, node, port, key); - if (!p) - goto err; - /* Suppose there shouldn't be a huge gap btw publs i.e. >INT_MAX */ - p->id = sc->publ_cnt++; - if (in_own_node(net, node)) + if (in_own_node(net, p->sk.node)) list_add(&p->local_publ, &sr->local_publ); list_add(&p->all_publ, &sr->all_publ); + p->id = sc->publ_cnt++; /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { - tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED, - p->port, p->node, p->scope, first); + tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first); } - return p; -err: - pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper); - return NULL; + res = true; +exit: + if (!res) + pr_warn("Failed to bind to %u,%u,%u\n", + p->sr.type, p->sr.lower, p->sr.upper); + spin_unlock_bh(&sc->lock); + return res; } /** * tipc_service_remove_publ - remove a publication from a service - * @sr: service_range to remove publication from - * @node: target node + * @r: service_range to remove publication from + * @sk: address publishing socket * @key: target publication key */ -static struct publication *tipc_service_remove_publ(struct service_range *sr, - u32 node, u32 key) +static struct publication *tipc_service_remove_publ(struct service_range *r, + struct tipc_socket_addr *sk, + u32 key) { struct publication *p; + u32 node = sk->node; - list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->key != key || (node && node != p->node)) + list_for_each_entry(p, &r->all_publ, all_publ) { + if (p->key != key || (node && node != p->sk.node)) continue; list_del(&p->all_publ); list_del(&p->local_publ); @@ -417,17 +421,14 @@ static int tipc_publ_sort(void *priv, struct list_head *a, static void tipc_service_subscribe(struct tipc_service *service, struct tipc_subscription *sub) { - struct tipc_subscr *sb = &sub->evt.s; struct publication *p, *first, *tmp; struct list_head publ_list; struct service_range *sr; - struct tipc_service_range r; - u32 filter; + u32 filter, lower, upper; - r.type = tipc_sub_read(sb, seq.type); - r.lower = tipc_sub_read(sb, seq.lower); - r.upper = tipc_sub_read(sb, seq.upper); - filter = tipc_sub_read(sb, filter); + filter = sub->s.filter; + lower = sub->s.seq.lower; + upper = sub->s.seq.upper; tipc_sub_get(sub); list_add(&sub->service_list, &service->subscriptions); @@ -436,7 +437,7 @@ static void tipc_service_subscribe(struct tipc_service *service, return; INIT_LIST_HEAD(&publ_list); - service_range_foreach_match(sr, service, r.lower, r.upper) { + service_range_foreach_match(sr, service, lower, upper) { first = NULL; list_for_each_entry(p, &sr->all_publ, all_publ) { if (filter & TIPC_SUB_PORTS) @@ -452,80 +453,74 @@ static void tipc_service_subscribe(struct tipc_service *service, /* Sort the publications before reporting */ list_sort(NULL, &publ_list, tipc_publ_sort); list_for_each_entry_safe(p, tmp, &publ_list, list) { - tipc_sub_report_overlap(sub, p->lower, p->upper, - TIPC_PUBLISHED, p->port, p->node, - p->scope, true); + tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true); list_del_init(&p->list); } } -static struct tipc_service *tipc_service_find(struct net *net, u32 type) +static struct tipc_service *tipc_service_find(struct net *net, + struct tipc_uaddr *ua) { struct name_table *nt = tipc_name_table(net); struct hlist_head *service_head; struct tipc_service *service; - service_head = &nt->services[hash(type)]; + service_head = &nt->services[hash(ua->sr.type)]; hlist_for_each_entry_rcu(service, service_head, service_list) { - if (service->type == type) + if (service->type == ua->sr.type) return service; } return NULL; }; -struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, - u32 lower, u32 upper, - u32 scope, u32 node, - u32 port, u32 key) +struct publication *tipc_nametbl_insert_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key) { - struct name_table *nt = tipc_name_table(net); struct tipc_service *sc; struct publication *p; - if (scope > TIPC_NODE_SCOPE || lower > upper) { - pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n", - type, lower, upper, scope); - return NULL; - } - sc = tipc_service_find(net, type); - if (!sc) - sc = tipc_service_create(type, &nt->services[hash(type)]); - if (!sc) + p = tipc_publ_create(ua, sk, key); + if (!p) return NULL; - spin_lock_bh(&sc->lock); - p = tipc_service_insert_publ(net, sc, type, lower, upper, - scope, node, port, key); - spin_unlock_bh(&sc->lock); - return p; + sc = tipc_service_find(net, ua); + if (!sc) + sc = tipc_service_create(net, ua); + if (sc && tipc_service_insert_publ(net, sc, p)) + return p; + kfree(p); + return NULL; } -struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, - u32 lower, u32 upper, - u32 node, u32 key) +struct publication *tipc_nametbl_remove_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key) { - struct tipc_service *sc = tipc_service_find(net, type); struct tipc_subscription *sub, *tmp; - struct service_range *sr = NULL; struct publication *p = NULL; + struct service_range *sr; + struct tipc_service *sc; bool last; + sc = tipc_service_find(net, ua); if (!sc) - return NULL; + goto exit; spin_lock_bh(&sc->lock); - sr = tipc_service_find_range(sc, lower, upper); + sr = tipc_service_find_range(sc, ua); if (!sr) - goto exit; - p = tipc_service_remove_publ(sr, node, key); + goto unlock; + p = tipc_service_remove_publ(sr, sk, key); if (!p) - goto exit; + goto unlock; /* Notify any waiting subscriptions */ last = list_empty(&sr->all_publ); list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { - tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN, - p->port, node, p->scope, last); + tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last); } /* Remove service range item if this was its last publication */ @@ -534,77 +529,83 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, kfree(sr); } - /* Delete service item if this no more publications and subscriptions */ + /* Delete service item if no more publications and subscriptions */ if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) { hlist_del_init_rcu(&sc->service_list); kfree_rcu(sc, rcu); } -exit: +unlock: spin_unlock_bh(&sc->lock); +exit: + if (!p) { + pr_err("Failed to remove unknown binding: %u,%u,%u/%u:%u/%u\n", + ua->sr.type, ua->sr.lower, ua->sr.upper, + sk->node, sk->ref, key); + } return p; } /** - * tipc_nametbl_translate - perform service instance to socket translation + * tipc_nametbl_lookup_anycast - perform service instance to socket translation * @net: network namespace - * @type: message type - * @instance: message instance - * @dnode: the search domain used during translation + * @ua: service address to look up + * @sk: address to socket we want to find * + * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be + * performed, which may not be this one. * On exit: - * - if translation is deferred to another node, leave 'dnode' unchanged and - * return 0 - * - if translation is attempted and succeeds, set 'dnode' to the publishing - * node and return the published (non-zero) port number - * - if translation is attempted and fails, set 'dnode' to 0 and return 0 + * - If lookup is deferred to another node, leave 'sk->node' unchanged and + * return 'true'. + * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which + * represent the bound socket and return 'true'. + * - If lookup fails, return 'false' * * Note that for legacy users (node configured with Z.C.N address format) the - * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0 + * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0 * we must look in the local binding list first */ -u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode) +bool tipc_nametbl_lookup_anycast(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk) { struct tipc_net *tn = tipc_net(net); bool legacy = tn->legacy_addr_format; u32 self = tipc_own_addr(net); - struct service_range *sr; + u32 inst = ua->sa.instance; + struct service_range *r; struct tipc_service *sc; - struct list_head *list; struct publication *p; - u32 port = 0; - u32 node = 0; + struct list_head *l; + bool res = false; - if (!tipc_in_scope(legacy, *dnode, self)) - return 0; + if (!tipc_in_scope(legacy, sk->node, self)) + return true; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (unlikely(!sc)) goto exit; spin_lock_bh(&sc->lock); - service_range_foreach_match(sr, sc, instance, instance) { + service_range_foreach_match(r, sc, inst, inst) { /* Select lookup algo: local, closest-first or round-robin */ - if (*dnode == self) { - list = &sr->local_publ; - if (list_empty(list)) + if (sk->node == self) { + l = &r->local_publ; + if (list_empty(l)) continue; - p = list_first_entry(list, struct publication, - local_publ); - list_move_tail(&p->local_publ, &sr->local_publ); - } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) { - list = &sr->local_publ; - p = list_first_entry(list, struct publication, - local_publ); - list_move_tail(&p->local_publ, &sr->local_publ); + p = list_first_entry(l, struct publication, local_publ); + list_move_tail(&p->local_publ, &r->local_publ); + } else if (legacy && !sk->node && !list_empty(&r->local_publ)) { + l = &r->local_publ; + p = list_first_entry(l, struct publication, local_publ); + list_move_tail(&p->local_publ, &r->local_publ); } else { - list = &sr->all_publ; - p = list_first_entry(list, struct publication, - all_publ); - list_move_tail(&p->all_publ, &sr->all_publ); + l = &r->all_publ; + p = list_first_entry(l, struct publication, all_publ); + list_move_tail(&p->all_publ, &r->all_publ); } - port = p->port; - node = p->node; + *sk = p->sk; + res = true; /* Todo: as for legacy, pick the first matching range only, a * "true" round-robin will be performed as needed. */ @@ -614,40 +615,45 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode) exit: rcu_read_unlock(); - *dnode = node; - return port; + return res; } -bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, - struct list_head *dsts, int *dstcnt, u32 exclude, - bool all) +/* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group + * Returns a list of one (== group anycast) or more (== group multicast) + * destination socket/node pairs matching the given address. + * The requester may or may not want to exclude himself from the list. + */ +bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua, + struct list_head *dsts, int *dstcnt, + u32 exclude, bool mcast) { u32 self = tipc_own_addr(net); + u32 inst = ua->sa.instance; struct service_range *sr; struct tipc_service *sc; struct publication *p; *dstcnt = 0; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (unlikely(!sc)) goto exit; spin_lock_bh(&sc->lock); /* Todo: a full search i.e. service_range_foreach_match() instead? */ - sr = service_range_match_first(sc->ranges.rb_node, instance, instance); + sr = service_range_match_first(sc->ranges.rb_node, inst, inst); if (!sr) goto no_match; list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->scope != scope) + if (p->scope != ua->scope) continue; - if (p->port == exclude && p->node == self) + if (p->sk.ref == exclude && p->sk.node == self) continue; - tipc_dest_push(dsts, p->node, p->port); + tipc_dest_push(dsts, p->sk.node, p->sk.ref); (*dstcnt)++; - if (all) + if (mcast) continue; list_move_tail(&p->all_publ, &sr->all_publ); break; @@ -659,23 +665,29 @@ exit: return !list_empty(dsts); } -void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports) +/* tipc_nametbl_lookup_mcast_sockets(): look up node local destinaton sockets + * matching the given address + * Used on nodes which have received a multicast/broadcast message + * Returns a list of local sockets + */ +void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua, + bool exact, struct list_head *dports) { struct service_range *sr; struct tipc_service *sc; struct publication *p; + u32 scope = ua->scope; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (!sc) goto exit; spin_lock_bh(&sc->lock); - service_range_foreach_match(sr, sc, lower, upper) { + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { list_for_each_entry(p, &sr->local_publ, local_publ) { if (p->scope == scope || (!exact && p->scope < scope)) - tipc_dest_push(dports, 0, p->port); + tipc_dest_push(dports, 0, p->sk.ref); } } spin_unlock_bh(&sc->lock); @@ -683,26 +695,27 @@ exit: rcu_read_unlock(); } -/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes - * - Creates list of nodes that overlap the given multicast address - * - Determines if any node local destinations overlap +/* tipc_nametbl_lookup_mcast_nodes(): look up all destination nodes matching + * the given address. Used in sending node. + * Used on nodes which are sending out a multicast/broadcast message + * Returns a list of nodes, including own node if applicable */ -void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, struct tipc_nlist *nodes) +void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua, + struct tipc_nlist *nodes) { struct service_range *sr; struct tipc_service *sc; struct publication *p; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (!sc) goto exit; spin_lock_bh(&sc->lock); - service_range_foreach_match(sr, sc, lower, upper) { + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { list_for_each_entry(p, &sr->all_publ, all_publ) { - tipc_nlist_add(nodes, p->node); + tipc_nlist_add(nodes, p->sk.node); } } spin_unlock_bh(&sc->lock); @@ -713,7 +726,7 @@ exit: /* tipc_nametbl_build_group - build list of communication group members */ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, - u32 type, u32 scope) + struct tipc_uaddr *ua) { struct service_range *sr; struct tipc_service *sc; @@ -721,7 +734,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, struct rb_node *n; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (!sc) goto exit; @@ -729,9 +742,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { sr = container_of(n, struct service_range, tree_node); list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->scope != scope) + if (p->scope != ua->scope) continue; - tipc_group_add_member(grp, p->node, p->port, p->lower); + tipc_group_add_member(grp, p->sk.node, p->sk.ref, + p->sr.lower); } } spin_unlock_bh(&sc->lock); @@ -741,9 +755,8 @@ exit: /* tipc_nametbl_publish - add service binding to name table */ -struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, - u32 upper, u32 scope, u32 port, - u32 key) +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); @@ -758,8 +771,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, goto exit; } - p = tipc_nametbl_insert_publ(net, type, lower, upper, scope, - tipc_own_addr(net), port, key); + p = tipc_nametbl_insert_publ(net, ua, sk, key); if (p) { nt->local_publ_count++; skb = tipc_named_publish(net, p); @@ -777,41 +789,33 @@ exit: /** * tipc_nametbl_withdraw - withdraw a service binding * @net: network namespace - * @type: service type - * @lower: service range lower bound - * @upper: service range upper bound + * @ua: service address/range being unbound + * @sk: address of the socket being unbound from * @key: target publication key */ -int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, - u32 upper, u32 key) +void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); - u32 self = tipc_own_addr(net); struct sk_buff *skb = NULL; struct publication *p; u32 rc_dests; spin_lock_bh(&tn->nametbl_lock); - p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key); + p = tipc_nametbl_remove_publ(net, ua, sk, key); if (p) { nt->local_publ_count--; skb = tipc_named_withdraw(net, p); list_del_init(&p->binding_sock); kfree_rcu(p, rcu); - } else { - pr_err("Failed to remove local publication {%u,%u,%u}/%u\n", - type, lower, upper, key); } rc_dests = nt->rc_dests; spin_unlock_bh(&tn->nametbl_lock); - if (skb) { + if (skb) tipc_node_broadcast(net, skb, rc_dests); - return 1; - } - return 0; } /** @@ -820,25 +824,25 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, */ bool tipc_nametbl_subscribe(struct tipc_subscription *sub) { - struct name_table *nt = tipc_name_table(sub->net); struct tipc_net *tn = tipc_net(sub->net); - struct tipc_subscr *s = &sub->evt.s; - u32 type = tipc_sub_read(s, seq.type); + u32 type = sub->s.seq.type; struct tipc_service *sc; + struct tipc_uaddr ua; bool res = true; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, + sub->s.seq.lower, sub->s.seq.upper); spin_lock_bh(&tn->nametbl_lock); - sc = tipc_service_find(sub->net, type); + sc = tipc_service_find(sub->net, &ua); if (!sc) - sc = tipc_service_create(type, &nt->services[hash(type)]); + sc = tipc_service_create(sub->net, &ua); if (sc) { spin_lock_bh(&sc->lock); tipc_service_subscribe(sc, sub); spin_unlock_bh(&sc->lock); } else { - pr_warn("Failed to subscribe for {%u,%u,%u}\n", type, - tipc_sub_read(s, seq.lower), - tipc_sub_read(s, seq.upper)); + pr_warn("Failed to subscribe for {%u,%u,%u}\n", + type, sub->s.seq.lower, sub->s.seq.upper); res = false; } spin_unlock_bh(&tn->nametbl_lock); @@ -852,12 +856,13 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub) void tipc_nametbl_unsubscribe(struct tipc_subscription *sub) { struct tipc_net *tn = tipc_net(sub->net); - struct tipc_subscr *s = &sub->evt.s; - u32 type = tipc_sub_read(s, seq.type); struct tipc_service *sc; + struct tipc_uaddr ua; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + sub->s.seq.type, sub->s.seq.lower, sub->s.seq.upper); spin_lock_bh(&tn->nametbl_lock); - sc = tipc_service_find(sub->net, type); + sc = tipc_service_find(sub->net, &ua); if (!sc) goto exit; @@ -909,7 +914,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc) spin_lock_bh(&sc->lock); rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) { list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) { - tipc_service_remove_publ(sr, p->node, p->key); + tipc_service_remove_publ(sr, &p->sk, p->key); kfree_rcu(p, rcu); } rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); @@ -993,9 +998,9 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope)) goto publ_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->sk.node)) goto publ_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port)) + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->sk.ref)) goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) goto publ_msg_full; @@ -1046,6 +1051,7 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, struct tipc_net *tn = tipc_net(net); struct tipc_service *service = NULL; struct hlist_head *head; + struct tipc_uaddr ua; int err; int i; @@ -1059,7 +1065,9 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, if (*last_type || (!i && *last_key && (*last_lower == *last_key))) { - service = tipc_service_find(net, *last_type); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + *last_type, *last_lower, *last_lower); + service = tipc_service_find(net, &ua); if (!service) return -EPIPE; } else { diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 5a82a01369d6..c7c9a3ddd420 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +42,7 @@ struct tipc_subscription; struct tipc_plist; struct tipc_nlist; struct tipc_group; +struct tipc_uaddr; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -50,13 +52,10 @@ struct tipc_group; #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ /** - * struct publication - info about a published (name or) name sequence - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound + * struct publication - info about a published service address or range + * @sr: service range represented by this publication + * @sk: address of socket bound to this publication * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE - * @node: network address of publishing socket's node - * @port: publishing port * @key: publication key, unique across the cluster * @id: publication id * @binding_node: all publications from the same node which bound this one @@ -74,12 +73,9 @@ struct tipc_group; * @rcu: RCU callback head used for deferred freeing */ struct publication { - u32 type; - u32 lower; - u32 upper; - u32 scope; - u32 node; - u32 port; + struct tipc_service_range sr; + struct tipc_socket_addr sk; + u16 scope; u32 key; u32 id; struct list_head binding_node; @@ -114,28 +110,29 @@ struct name_table { }; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); - -u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports); +bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk); +void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua, + bool exact, struct list_head *dports); +void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua, + struct tipc_nlist *nodes); +bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua, + struct list_head *dsts, int *dstcnt, + u32 exclude, bool mcast); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, - u32 type, u32 domain); -void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, struct tipc_nlist *nodes); -bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, - struct list_head *dsts, int *dstcnt, u32 exclude, - bool all); -struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, - u32 upper, u32 scope, u32 port, - u32 key); -int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper, - u32 key); -struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, - u32 lower, u32 upper, u32 scope, - u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, - u32 lower, u32 upper, - u32 node, u32 key); + struct tipc_uaddr *ua); +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key); +void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key); bool tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); diff --git a/net/tipc/net.c b/net/tipc/net.c index a129f661bee3..3f927949bb23 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -125,6 +125,11 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr) static void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); + struct tipc_socket_addr sk = {0, addr}; + struct tipc_uaddr ua; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + TIPC_NODE_STATE, addr, addr); if (cmpxchg(&tn->node_addr, 0, addr)) return; @@ -132,8 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) tipc_named_reinit(net); tipc_sk_reinit(net); tipc_mon_reinit_self(net); - tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr, - TIPC_CLUSTER_SCOPE, 0, addr); + tipc_nametbl_publish(net, &ua, &sk, addr); } void tipc_net_finalize_work(struct work_struct *work) diff --git a/net/tipc/node.c b/net/tipc/node.c index 9c95ef4b6326..0daf3be11ed1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -398,21 +398,23 @@ static void tipc_node_write_unlock_fast(struct tipc_node *n) static void tipc_node_write_unlock(struct tipc_node *n) __releases(n->lock) { + struct tipc_socket_addr sk; struct net *net = n->net; - u32 addr = 0; u32 flags = n->action_flags; - u32 link_id = 0; - u32 bearer_id; struct list_head *publ_list; + struct tipc_uaddr ua; + u32 bearer_id; if (likely(!flags)) { write_unlock_bh(&n->lock); return; } - addr = n->addr; - link_id = n->link_id; - bearer_id = link_id & 0xffff; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + TIPC_LINK_STATE, n->addr, n->addr); + sk.ref = n->link_id; + sk.node = n->addr; + bearer_id = n->link_id & 0xffff; publ_list = &n->publ_list; n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | @@ -421,20 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr, n->capabilities); + tipc_publ_notify(net, publ_list, n->addr, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr, n->capabilities); + tipc_named_node_up(net, n->addr, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, addr, bearer_id); - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, link_id); + tipc_mon_peer_up(net, n->addr, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, n->link_id); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, addr, bearer_id); - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - addr, link_id); + tipc_mon_peer_down(net, n->addr, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, n->link_id); } } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cebcc104dc70..117a472a8e61 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3,7 +3,7 @@ * * Copyright (c) 2001-2007, 2012-2019, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems - * Copyright (c) 2020, Red Hat Inc + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -111,7 +111,6 @@ struct tipc_sock { struct sock sk; u32 conn_type; u32 conn_instance; - int published; u32 max_pkt; u32 maxnagle; u32 portid; @@ -141,6 +140,7 @@ struct tipc_sock { bool expect_ack; bool nodelay; bool group_is_open; + bool published; }; static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -151,10 +151,8 @@ static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern); static void tipc_sk_timeout(struct timer_list *t); -static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq); -static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq); +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); @@ -644,7 +642,7 @@ static int tipc_release(struct socket *sock) __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); - tipc_sk_withdraw(tsk, 0, NULL); + tipc_sk_withdraw(tsk, NULL); __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@ -677,22 +675,31 @@ static int tipc_release(struct socket *sock) */ static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); + bool unbind = false; if (unlikely(!alen)) - return tipc_sk_withdraw(tsk, 0, NULL); + return tipc_sk_withdraw(tsk, NULL); - if (addr->addrtype == TIPC_SERVICE_ADDR) - addr->addr.nameseq.upper = addr->addr.nameseq.lower; + if (ua->addrtype == TIPC_SERVICE_ADDR) { + ua->addrtype = TIPC_SERVICE_RANGE; + ua->sr.upper = ua->sr.lower; + } + if (ua->scope < 0) { + unbind = true; + ua->scope = -ua->scope; + } + /* Users may still use deprecated TIPC_ZONE_SCOPE */ + if (ua->scope != TIPC_NODE_SCOPE) + ua->scope = TIPC_CLUSTER_SCOPE; if (tsk->group) return -EACCES; - if (addr->scope >= 0) - return tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq); - else - return tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); + if (unbind) + return tipc_sk_withdraw(tsk, ua); + return tipc_sk_publish(tsk, ua); } int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) @@ -707,18 +714,17 @@ int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; + u32 atype = ua->addrtype; if (alen) { - if (alen < sizeof(struct sockaddr_tipc)) + if (!tipc_uaddr_valid(ua, alen)) return -EINVAL; - if (addr->family != AF_TIPC) + if (atype == TIPC_SOCKET_ADDR) return -EAFNOSUPPORT; - if (addr->addrtype > TIPC_SERVICE_ADDR) - return -EAFNOSUPPORT; - if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) { + if (ua->sr.type < TIPC_RESERVED_TYPES) { pr_warn_once("Can't bind to reserved service type %u\n", - addr->addr.nameseq.type); + ua->sr.type); return -EACCES; } } @@ -826,7 +832,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, /** * tipc_sendmcast - send multicast message * @sock: socket structure - * @seq: destination address + * @ua: destination address struct * @msg: message to send * @dlen: length of data to send * @timeout: timeout to wait for wakeup @@ -834,7 +840,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ -static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, +static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua, struct msghdr *msg, size_t dlen, long timeout) { struct sock *sk = sock->sk; @@ -842,7 +848,6 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); - struct tipc_mc_method *method = &tsk->mc_method; struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; @@ -857,8 +862,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, /* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); - tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, &dsts); + tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH; @@ -868,9 +872,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); - msg_set_nametype(hdr, seq->type); - msg_set_namelower(hdr, seq->lower); - msg_set_nameupper(hdr, seq->upper); + msg_set_nametype(hdr, ua->sr.type); + msg_set_namelower(hdr, ua->sr.lower); + msg_set_nameupper(hdr, ua->sr.upper); /* Build message as chain of buffers */ __skb_queue_head_init(&pkts); @@ -880,7 +884,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, if (unlikely(rc == dlen)) { trace_tipc_sk_sendmcast(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_mcast_xmit(net, &pkts, method, &dsts, + rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts, &tsk->cong_link_cnt); } @@ -954,7 +958,7 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); @@ -962,8 +966,8 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, u32 node, port; int rc; - node = dest->addr.id.node; - port = dest->addr.id.ref; + node = ua->sk.node; + port = ua->sk.ref; if (!port && !node) return -EHOSTUNREACH; @@ -997,7 +1001,7 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct list_head *cong_links = &tsk->cong_links; @@ -1008,16 +1012,13 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, struct net *net = sock_net(sk); u32 node, port, exclude; struct list_head dsts; - u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong; INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); while (++lookups < 4) { exclude = tipc_group_exclude(tsk->group); @@ -1026,8 +1027,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, false)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, + exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); cong = tipc_group_cong(tsk->group, node, port, blks, @@ -1082,7 +1083,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); @@ -1107,9 +1108,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, return -EHOSTUNREACH; /* Complete message header */ - if (dest) { + if (ua) { msg_set_type(hdr, TIPC_GRP_MCAST_MSG); - msg_set_nameinst(hdr, dest->addr.name.name.instance); + msg_set_nameinst(hdr, ua->sa.instance); } else { msg_set_type(hdr, TIPC_GRP_BCAST_MSG); msg_set_nameinst(hdr, 0); @@ -1156,29 +1157,25 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 type, inst, scope, exclude; struct list_head dsts; - u32 dstcnt; + u32 dstcnt, exclude; INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); - if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, true)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true)) return -EHOSTUNREACH; if (dstcnt == 1) { - tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref); return tipc_send_group_unicast(sock, m, dlen, timeout); } @@ -1198,17 +1195,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { u32 self = tipc_own_addr(net); - u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; u32 portid, onode; struct sk_buff_head tmpq; struct list_head dports; struct tipc_msg *hdr; + struct tipc_uaddr ua; int user, mtyp, hlen; bool exact; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); + ua.addrtype = TIPC_SERVICE_RANGE; skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { @@ -1217,7 +1215,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, mtyp = msg_type(hdr); hlen = skb_headroom(skb) + msg_hdr_sz(hdr); onode = msg_orignode(hdr); - type = msg_nametype(hdr); + ua.sr.type = msg_nametype(hdr); if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); @@ -1232,24 +1230,23 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, /* Group messages require exact scope match */ if (msg_in_group(hdr)) { - lower = 0; - upper = ~0; - scope = msg_lookup_scope(hdr); + ua.sr.lower = 0; + ua.sr.upper = ~0; + ua.scope = msg_lookup_scope(hdr); exact = true; } else { /* TIPC_NODE_SCOPE means "any scope" in this context */ if (onode == self) - scope = TIPC_NODE_SCOPE; + ua.scope = TIPC_NODE_SCOPE; else - scope = TIPC_CLUSTER_SCOPE; + ua.scope = TIPC_CLUSTER_SCOPE; exact = false; - lower = msg_namelower(hdr); - upper = msg_nameupper(hdr); + ua.sr.lower = msg_namelower(hdr); + ua.sr.upper = msg_nameupper(hdr); } /* Create destination port list: */ - tipc_nametbl_mc_lookup(net, type, lower, upper, - scope, exact, &dports); + tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports); /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { @@ -1417,44 +1414,43 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_service_range *seq; + struct tipc_socket_addr skaddr; struct sk_buff_head pkts; - u32 dport = 0, dnode = 0; - u32 type = 0, inst = 0; - int mtu, rc; + int atype, mtu, rc; if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; - if (likely(dest)) { - if (unlikely(m->msg_namelen < sizeof(*dest))) - return -EINVAL; - if (unlikely(dest->family != AF_TIPC)) + if (ua) { + if (!tipc_uaddr_valid(ua, m->msg_namelen)) return -EINVAL; + atype = ua->addrtype; } + /* If socket belongs to a communication group follow other paths */ if (grp) { - if (!dest) + if (!ua) return tipc_send_group_bcast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_SERVICE_ADDR) + if (atype == TIPC_SERVICE_ADDR) return tipc_send_group_anycast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_SOCKET_ADDR) + if (atype == TIPC_SOCKET_ADDR) return tipc_send_group_unicast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_MCAST) + if (atype == TIPC_SERVICE_RANGE) return tipc_send_group_mcast(sock, m, dlen, timeout); return -EINVAL; } - if (unlikely(!dest)) { - dest = &tsk->peer; - if (!syn && dest->family != AF_TIPC) + if (!ua) { + ua = (struct tipc_uaddr *)&tsk->peer; + if (!syn && ua->family != AF_TIPC) return -EDESTADDRREQ; + atype = ua->addrtype; } if (unlikely(syn)) { @@ -1464,54 +1460,51 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) return -EISCONN; if (tsk->published) return -EOPNOTSUPP; - if (dest->addrtype == TIPC_SERVICE_ADDR) { - tsk->conn_type = dest->addr.name.name.type; - tsk->conn_instance = dest->addr.name.name.instance; + if (atype == TIPC_SERVICE_ADDR) { + tsk->conn_type = ua->sa.type; + tsk->conn_instance = ua->sa.instance; } msg_set_syn(hdr, 1); } - seq = &dest->addr.nameseq; - if (dest->addrtype == TIPC_ADDR_MCAST) - return tipc_sendmcast(sock, seq, m, dlen, timeout); - - if (dest->addrtype == TIPC_SERVICE_ADDR) { - type = dest->addr.name.name.type; - inst = dest->addr.name.name.instance; - dnode = dest->addr.name.domain; - dport = tipc_nametbl_translate(net, type, inst, &dnode); - if (unlikely(!dport && !dnode)) + /* Determine destination */ + if (atype == TIPC_SERVICE_RANGE) { + return tipc_sendmcast(sock, ua, m, dlen, timeout); + } else if (atype == TIPC_SERVICE_ADDR) { + skaddr.node = ua->lookup_node; + ua->scope = tipc_node2scope(skaddr.node); + if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_SOCKET_ADDR) { - dnode = dest->addr.id.node; + } else if (atype == TIPC_SOCKET_ADDR) { + skaddr = ua->sk; } else { return -EINVAL; } /* Block or return if destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, - !tipc_dest_find(clinks, dnode, 0)); + !tipc_dest_find(clinks, skaddr.node, 0)); if (unlikely(rc)) return rc; - if (dest->addrtype == TIPC_SERVICE_ADDR) { + /* Finally build message header */ + msg_set_destnode(hdr, skaddr.node); + msg_set_destport(hdr, skaddr.ref); + if (atype == TIPC_SERVICE_ADDR) { msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); - msg_set_nametype(hdr, type); - msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dport); + msg_set_nametype(hdr, ua->sa.type); + msg_set_nameinst(hdr, ua->sa.instance); + msg_set_lookup_scope(hdr, ua->scope); } else { /* TIPC_SOCKET_ADDR */ msg_set_type(hdr, TIPC_DIRECT_MSG); msg_set_lookup_scope(hdr, 0); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dest->addr.id.ref); msg_set_hdr_sz(hdr, BASIC_H_SIZE); } + /* Add message body */ __skb_queue_head_init(&pkts); - mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true); + mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@ -1520,10 +1513,11 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) return -ENOMEM; } + /* Send message */ trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid); if (unlikely(rc == -ELINKCONG)) { - tipc_dest_push(clinks, dnode, 0); + tipc_dest_push(clinks, skaddr.node, 0); tsk->cong_link_cnt++; rc = 0; } @@ -2891,66 +2885,62 @@ static void tipc_sk_timeout(struct timer_list *t) sock_put(sk); } -static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq) +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); - struct publication *publ; + struct tipc_socket_addr skaddr; + struct publication *p; u32 key; - if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; if (key == tsk->portid) return -EADDRINUSE; - - publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, - scope, tsk->portid, key); - if (unlikely(!publ)) + skaddr.ref = tsk->portid; + skaddr.node = tipc_own_addr(net); + p = tipc_nametbl_publish(net, ua, &skaddr, key); + if (unlikely(!p)) return -EINVAL; - list_add(&publ->binding_sock, &tsk->publications); + list_add(&p->binding_sock, &tsk->publications); tsk->pub_count++; - tsk->published = 1; + tsk->published = true; return 0; } -static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_service_range const *seq) +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct net *net = sock_net(&tsk->sk); - struct publication *publ; - struct publication *safe; + struct publication *safe, *p; + struct tipc_uaddr _ua; int rc = -EINVAL; - if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - - list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { - if (seq) { - if (publ->scope != scope) - continue; - if (publ->type != seq->type) - continue; - if (publ->lower != seq->lower) - continue; - if (publ->upper != seq->upper) - break; - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); - rc = 0; - break; + list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) { + if (!ua) { + tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope, + p->sr.type, p->sr.lower, p->sr.upper); + tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key); + continue; } - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); + /* Unbind specific publication */ + if (p->scope != ua->scope) + continue; + if (p->sr.type != ua->sr.type) + continue; + if (p->sr.lower != ua->sr.lower) + continue; + if (p->sr.upper != ua->sr.upper) + break; + tipc_nametbl_withdraw(net, ua, &p->sk, p->key); rc = 0; + break; } - if (list_empty(&tsk->publications)) + if (list_empty(&tsk->publications)) { tsk->published = 0; + rc = 0; + } return rc; } @@ -3067,13 +3057,15 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_service_range seq; + struct tipc_uaddr ua; int rc; if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; if (mreq->scope > TIPC_NODE_SCOPE) return -EINVAL; + if (mreq->scope != TIPC_NODE_SCOPE) + mreq->scope = TIPC_CLUSTER_SCOPE; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); @@ -3083,11 +3075,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) msg_set_lookup_scope(hdr, mreq->scope); msg_set_nametype(hdr, mreq->type); msg_set_dest_droppable(hdr, true); - seq.type = mreq->type; - seq.lower = mreq->instance; - seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); - rc = tipc_sk_publish(tsk, mreq->scope, &seq); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope, + mreq->type, mreq->instance, mreq->instance); + tipc_nametbl_build_group(net, grp, &ua); + rc = tipc_sk_publish(tsk, &ua); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; @@ -3104,15 +3095,17 @@ static int tipc_sk_leave(struct tipc_sock *tsk) { struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; - struct tipc_service_range seq; + struct tipc_uaddr ua; int scope; if (!grp) return -EINVAL; - tipc_group_self(grp, &seq, &scope); + ua.addrtype = TIPC_SERVICE_RANGE; + tipc_group_self(grp, &ua.sr, &scope); + ua.scope = scope; tipc_group_delete(net, grp); tsk->group = NULL; - tipc_sk_withdraw(tsk, scope, &seq); + tipc_sk_withdraw(tsk, &ua); return 0; } @@ -3711,11 +3704,11 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper)) goto attr_msg_cancel; nla_nest_end(skb, attrs); @@ -3863,9 +3856,9 @@ bool tipc_sk_filtering(struct sock *sk) p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); if (p) { - type = p->type; - lower = p->lower; - upper = p->upper; + type = p->sr.type; + lower = p->sr.lower; + upper = p->sr.upper; } } @@ -3964,9 +3957,9 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) if (tsk->published) { p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0); } i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win); i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f6ad0005218c..8e00d739f03a 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2017, Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems - * Copyright (c) 2020, Red Hat Inc + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,77 +40,75 @@ #include "subscr.h" static void tipc_sub_send_event(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port, u32 node) + struct publication *p, + u32 event) { + struct tipc_subscr *s = &sub->evt.s; struct tipc_event *evt = &sub->evt; if (sub->inactive) return; tipc_evt_write(evt, event, event); - tipc_evt_write(evt, found_lower, found_lower); - tipc_evt_write(evt, found_upper, found_upper); - tipc_evt_write(evt, port.ref, port); - tipc_evt_write(evt, port.node, node); + if (p) { + tipc_evt_write(evt, found_lower, p->sr.lower); + tipc_evt_write(evt, found_upper, p->sr.upper); + tipc_evt_write(evt, port.ref, p->sk.ref); + tipc_evt_write(evt, port.node, p->sk.node); + } else { + tipc_evt_write(evt, found_lower, s->seq.lower); + tipc_evt_write(evt, found_upper, s->seq.upper); + tipc_evt_write(evt, port.ref, 0); + tipc_evt_write(evt, port.node, 0); + } tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt); } /** * tipc_sub_check_overlap - test for subscription overlap with the given values - * @seq: tipc_name_seq to check - * @found_lower: lower value to test - * @found_upper: upper value to test + * @subscribed: the service range subscribed for + * @found: the service range we are checning for match * - * Return: 1 if there is overlap, otherwise 0. + * Returns true if there is overlap, otherwise false. */ -int tipc_sub_check_overlap(struct tipc_service_range *seq, u32 found_lower, - u32 found_upper) +static bool tipc_sub_check_overlap(struct tipc_service_range *subscribed, + struct tipc_service_range *found) { - if (found_lower < seq->lower) - found_lower = seq->lower; - if (found_upper > seq->upper) - found_upper = seq->upper; - if (found_lower > found_upper) - return 0; - return 1; + u32 found_lower = found->lower; + u32 found_upper = found->upper; + + if (found_lower < subscribed->lower) + found_lower = subscribed->lower; + if (found_upper > subscribed->upper) + found_upper = subscribed->upper; + return found_lower <= found_upper; } void tipc_sub_report_overlap(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port, u32 node, - u32 scope, int must) + struct publication *p, + u32 event, bool must) { - struct tipc_subscr *s = &sub->evt.s; - u32 filter = tipc_sub_read(s, filter); - struct tipc_service_range seq; + struct tipc_service_range *sr = &sub->s.seq; + u32 filter = sub->s.filter; - seq.type = tipc_sub_read(s, seq.type); - seq.lower = tipc_sub_read(s, seq.lower); - seq.upper = tipc_sub_read(s, seq.upper); - - if (!tipc_sub_check_overlap(&seq, found_lower, found_upper)) + if (!tipc_sub_check_overlap(sr, &p->sr)) return; - if (!must && !(filter & TIPC_SUB_PORTS)) return; - if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) + if (filter & TIPC_SUB_CLUSTER_SCOPE && p->scope == TIPC_NODE_SCOPE) return; - if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) + if (filter & TIPC_SUB_NODE_SCOPE && p->scope != TIPC_NODE_SCOPE) return; spin_lock(&sub->lock); - tipc_sub_send_event(sub, found_lower, found_upper, - event, port, node); + tipc_sub_send_event(sub, p, event); spin_unlock(&sub->lock); } static void tipc_sub_timeout(struct timer_list *t) { struct tipc_subscription *sub = from_timer(sub, t, timer); - struct tipc_subscr *s = &sub->evt.s; spin_lock(&sub->lock); - tipc_sub_send_event(sub, s->seq.lower, s->seq.upper, - TIPC_SUBSCR_TIMEOUT, 0, 0); + tipc_sub_send_event(sub, NULL, TIPC_SUBSCR_TIMEOUT); sub->inactive = true; spin_unlock(&sub->lock); } @@ -134,12 +132,14 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net, struct tipc_subscr *s, int conid) { + u32 lower = tipc_sub_read(s, seq.lower); + u32 upper = tipc_sub_read(s, seq.upper); u32 filter = tipc_sub_read(s, filter); struct tipc_subscription *sub; u32 timeout; if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) || - (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) { + lower > upper) { pr_warn("Subscription rejected, illegal request\n"); return NULL; } @@ -154,6 +154,12 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net, sub->conid = conid; sub->inactive = false; memcpy(&sub->evt.s, s, sizeof(*s)); + sub->s.seq.type = tipc_sub_read(s, seq.type); + sub->s.seq.lower = lower; + sub->s.seq.upper = upper; + sub->s.filter = filter; + sub->s.timeout = tipc_sub_read(s, timeout); + memcpy(sub->s.usr_handle, s->usr_handle, 8); spin_lock_init(&sub->lock); kref_init(&sub->kref); if (!tipc_nametbl_subscribe(sub)) { diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 3ded27391d54..ddea6554ec46 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -3,7 +3,7 @@ * * Copyright (c) 2003-2017, Ericsson AB * Copyright (c) 2005-2007, 2012-2013, Wind River Systems - * Copyright (c) 2020, Red Hat Inc + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ #define TIPC_MAX_SUBSCR 65535 #define TIPC_MAX_PUBL 65535 +struct publication; struct tipc_subscription; struct tipc_conn; @@ -59,12 +60,13 @@ struct tipc_conn; * @lock: serialize up/down and timer events */ struct tipc_subscription { + struct tipc_subscr s; + struct tipc_event evt; struct kref kref; struct net *net; struct timer_list timer; struct list_head service_list; struct list_head sub_list; - struct tipc_event evt; int conid; bool inactive; spinlock_t lock; @@ -74,13 +76,9 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net, struct tipc_subscr *s, int conid); void tipc_sub_unsubscribe(struct tipc_subscription *sub); - -int tipc_sub_check_overlap(struct tipc_service_range *seq, - u32 found_lower, u32 found_upper); void tipc_sub_report_overlap(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port, u32 node, - u32 scope, int must); + struct publication *p, + u32 event, bool must); int __net_init tipc_topsrv_init_net(struct net *net); void __net_exit tipc_topsrv_exit_net(struct net *net); |