diff options
80 files changed, 1206 insertions, 772 deletions
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 04892b821157..e26c607468a6 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -120,10 +120,14 @@ seconds. warnings -------- -This controls console messages from the networking stack that can occur because -of problems on the network like duplicate address or bad checksums. Normally, -this should be enabled, but if the problem persists the messages can be -disabled. +This sysctl is now unused. + +This was used to control console messages from the networking stack that +occur because of problems on the network like duplicate address or bad +checksums. + +These messages are now emitted at KERN_DEBUG and can generally be enabled +and controlled by the dynamic_debug facility. netdev_budget ------------- diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 3de1394bcab8..e2fe0700b3b4 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -87,4 +87,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 6e6cd159924b..92121b0f5b98 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index ed94e5ed0a23..60f60f5b9b35 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -82,6 +82,8 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index ca2c6e6f31c6..2c6890209ea6 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -80,5 +80,7 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index a1b49bac7951..09a93fb566f6 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -89,4 +89,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 6c9a24b3aefa..e8589819c274 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index a14baa218c76..2e9ee8c55a10 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -98,4 +98,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 6aa3ce1854aa..f3492e8c9f70 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index fe35ceacf0e7..7984a1cab3da 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_BPF_EXTENSIONS 0x4029 +#define SO_INCOMING_CPU 0x402A + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a9c3e2e18c05..3474e4ef166d 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -87,4 +87,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index d110e288d7ac..d3fa80d04e6b 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -361,6 +361,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, protocol)); break; case BPF_ANC | SKF_AD_IFINDEX: + case BPF_ANC | SKF_AD_HATYPE: + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + ifindex) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + type) != 2); PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); PPC_CMPDI(r_scratch1, 0); @@ -368,14 +373,18 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { /* Exit, returning 0; first pass hits here. */ - PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); + PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); PPC_LI(r_ret, 0); PPC_JMP(exit_addr); } - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, - ifindex) != 4); - PPC_LWZ_OFFS(r_A, r_scratch1, + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + PPC_LWZ_OFFS(r_A, r_scratch1, offsetof(struct net_device, ifindex)); + } else { + PPC_LHZ_OFFS(r_A, r_scratch1, + offsetof(struct net_device, type)); + } + break; case BPF_ANC | SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index e031332096d7..8457636c33e1 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -86,4 +86,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 54d9608681b6..4a8003a94163 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -76,6 +76,8 @@ #define SO_BPF_EXTENSIONS 0x0032 +#define SO_INCOMING_CPU 0x0033 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 39acec0cf0b1..c46f6a696849 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -91,4 +91,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* _XTENSA_SOCKET_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 8ea4d5be7376..6c643230a5ed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -115,7 +115,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = { "tso_packets", "xmit_more", "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", - "rx_csum_good", "rx_csum_none", "tx_chksum_offload", + "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", /* packet statistics */ "broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0efbae90f1ba..d1eb25dbff56 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1893,6 +1893,7 @@ static void mlx4_en_clear_stats(struct net_device *dev) priv->rx_ring[i]->packets = 0; priv->rx_ring[i]->csum_ok = 0; priv->rx_ring[i]->csum_none = 0; + priv->rx_ring[i]->csum_complete = 0; } } @@ -2503,6 +2504,10 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; + if (mdev->dev->caps.rx_checksum_flags_port[priv->port] & + MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP) + priv->flags |= MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP; + /* Set default MAC */ dev->addr_len = ETH_ALEN; mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 134b12e17da5..6cb80072af6c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -155,11 +155,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->rx_bytes = 0; priv->port_stats.rx_chksum_good = 0; priv->port_stats.rx_chksum_none = 0; + priv->port_stats.rx_chksum_complete = 0; for (i = 0; i < priv->rx_ring_num; i++) { stats->rx_packets += priv->rx_ring[i]->packets; stats->rx_bytes += priv->rx_ring[i]->bytes; priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok; priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none; + priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete; } stats->tx_packets = 0; stats->tx_bytes = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5a193f40a14c..ccd95177ea7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -42,6 +42,10 @@ #include <linux/vmalloc.h> #include <linux/irq.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_checksum.h> +#endif + #include "mlx4_en.h" static int mlx4_alloc_pages(struct mlx4_en_priv *priv, @@ -643,6 +647,86 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, } } +/* When hardware doesn't strip the vlan, we need to calculate the checksum + * over it and add it to the hardware's checksum calculation + */ +static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, + struct vlan_hdr *vlanh) +{ + return csum_add(hw_checksum, *(__wsum *)vlanh); +} + +/* Although the stack expects checksum which doesn't include the pseudo + * header, the HW adds it. To address that, we are subtracting the pseudo + * header checksum from the checksum value provided by the HW. + */ +static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, + struct iphdr *iph) +{ + __u16 length_for_csum = 0; + __wsum csum_pseudo_header = 0; + + length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); + csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr, + length_for_csum, iph->protocol, 0); + skb->csum = csum_sub(hw_checksum, csum_pseudo_header); +} + +#if IS_ENABLED(CONFIG_IPV6) +/* In IPv6 packets, besides subtracting the pseudo header checksum, + * we also compute/add the IP header checksum which + * is not added by the HW. + */ +static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, + struct ipv6hdr *ipv6h) +{ + __wsum csum_pseudo_hdr = 0; + + if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + return -1; + hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + + csum_pseudo_hdr = csum_partial(&ipv6h->saddr, + sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr)); + + skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr); + skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0)); + return 0; +} +#endif +static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, + int hwtstamp_rx_filter) +{ + __wsum hw_checksum = 0; + + void *hdr = (u8 *)va + sizeof(struct ethhdr); + + hw_checksum = csum_unfold((__force __sum16)cqe->checksum); + + if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) && + hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) { + /* next protocol non IPv4 or IPv6 */ + if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto + != htons(ETH_P_IP) && + ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto + != htons(ETH_P_IPV6)) + return -1; + hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); + hdr += sizeof(struct vlan_hdr); + } + + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4)) + get_fixed_ipv4_csum(hw_checksum, skb, hdr); +#if IS_ENABLED(CONFIG_IPV6) + else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) + if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + return -1; +#endif + return 0; +} + int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -744,73 +828,95 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); if (likely(dev->features & NETIF_F_RXCSUM)) { - if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && - (cqe->checksum == cpu_to_be16(0xffff))) { - ring->csum_ok++; - /* This packet is eligible for GRO if it is: - * - DIX Ethernet (type interpretation) - * - TCP/IP (v4) - * - without IP options - * - not an IP fragment - * - no LLS polling in progress - */ - if (!mlx4_en_cq_busy_polling(cq) && - (dev->features & NETIF_F_GRO)) { - struct sk_buff *gro_skb = napi_get_frags(&cq->napi); - if (!gro_skb) - goto next; - - nr = mlx4_en_complete_rx_desc(priv, - rx_desc, frags, gro_skb, - length); - if (!nr) - goto next; - - skb_shinfo(gro_skb)->nr_frags = nr; - gro_skb->len = length; - gro_skb->data_len = length; - gro_skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP)) { + if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && + cqe->checksum == cpu_to_be16(0xffff)) { + ip_summed = CHECKSUM_UNNECESSARY; + ring->csum_ok++; + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } + } else { + if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && + (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV6))) { + ip_summed = CHECKSUM_COMPLETE; + ring->csum_complete++; + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } + } + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } - if (l2_tunnel) - gro_skb->csum_level = 1; - if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && - (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { - u16 vid = be16_to_cpu(cqe->sl_vid); + /* This packet is eligible for GRO if it is: + * - DIX Ethernet (type interpretation) + * - TCP/IP (v4) + * - without IP options + * - not an IP fragment + * - no LLS polling in progress + */ + if (!mlx4_en_cq_busy_polling(cq) && + (dev->features & NETIF_F_GRO)) { + struct sk_buff *gro_skb = napi_get_frags(&cq->napi); + if (!gro_skb) + goto next; + + nr = mlx4_en_complete_rx_desc(priv, + rx_desc, frags, gro_skb, + length); + if (!nr) + goto next; + + if (ip_summed == CHECKSUM_COMPLETE) { + void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); + if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + ring->csum_complete--; + } + } - __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); - } + skb_shinfo(gro_skb)->nr_frags = nr; + gro_skb->len = length; + gro_skb->data_len = length; + gro_skb->ip_summed = ip_summed; - if (dev->features & NETIF_F_RXHASH) - skb_set_hash(gro_skb, - be32_to_cpu(cqe->immed_rss_invalid), - PKT_HASH_TYPE_L3); + if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) + gro_skb->encapsulation = 1; + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && + (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { + u16 vid = be16_to_cpu(cqe->sl_vid); - skb_record_rx_queue(gro_skb, cq->ring); - skb_mark_napi_id(gro_skb, &cq->napi); + __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); + } - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, - skb_hwtstamps(gro_skb), - timestamp); - } + if (dev->features & NETIF_F_RXHASH) + skb_set_hash(gro_skb, + be32_to_cpu(cqe->immed_rss_invalid), + PKT_HASH_TYPE_L3); - napi_gro_frags(&cq->napi); - goto next; - } + skb_record_rx_queue(gro_skb, cq->ring); + skb_mark_napi_id(gro_skb, &cq->napi); - /* GRO not possible, complete processing here */ - ip_summed = CHECKSUM_UNNECESSARY; - } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, + skb_hwtstamps(gro_skb), + timestamp); } - } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + + napi_gro_frags(&cq->napi); + goto next; } + /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); if (!skb) { priv->stats.rx_dropped++; @@ -822,6 +928,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud goto next; } + if (ip_summed == CHECKSUM_COMPLETE) { + if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) { + ip_summed = CHECKSUM_NONE; + ring->csum_complete--; + ring->csum_none++; + } + } + skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 9f821964a1b9..2f6ba420ac03 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1629,6 +1629,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) struct mlx4_init_hca_param init_hca; u64 icm_size; int err; + struct mlx4_config_dev_params params; if (!mlx4_is_slave(dev)) { err = mlx4_QUERY_FW(dev); @@ -1762,6 +1763,14 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto unmap_bf; } + /* Query CONFIG_DEV parameters */ + err = mlx4_config_dev_retrieval(dev, ¶ms); + if (err && err != -ENOTSUPP) { + mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n"); + } else if (!err) { + dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1; + dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2; + } priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index ef83d127f406..de456749ffae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -326,6 +326,7 @@ struct mlx4_en_rx_ring { #endif unsigned long csum_ok; unsigned long csum_none; + unsigned long csum_complete; int hwtstamp_rx_filter; cpumask_var_t affinity_mask; }; @@ -449,6 +450,7 @@ struct mlx4_en_port_stats { unsigned long rx_alloc_failed; unsigned long rx_chksum_good; unsigned long rx_chksum_none; + unsigned long rx_chksum_complete; unsigned long tx_chksum_offload; #define NUM_PORT_STATS 9 }; @@ -507,7 +509,8 @@ enum { MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2), /* whether we need to drop packets that hardware loopback-ed */ MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), - MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4) + MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), + MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5), }; #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 33b85bae6c21..7d3af190be55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -14,7 +14,7 @@ config STMMAC_ETH if STMMAC_ETH config STMMAC_PLATFORM - bool "STMMAC Platform bus support" + tristate "STMMAC Platform bus support" depends on STMMAC_ETH default y ---help--- @@ -27,7 +27,7 @@ config STMMAC_PLATFORM If unsure, say N. config STMMAC_PCI - bool "STMMAC PCI bus support" + tristate "STMMAC PCI bus support" depends on STMMAC_ETH && PCI ---help--- This is to select the Synopsys DWMAC available on PCI devices, diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 034da704f22f..ac4d5629d905 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -1,9 +1,12 @@ obj-$(CONFIG_STMMAC_ETH) += stmmac.o -stmmac-$(CONFIG_STMMAC_PCI) += stmmac_pci.o -stmmac-$(CONFIG_STMMAC_PLATFORM) += stmmac_platform.o dwmac-meson.o \ - dwmac-sunxi.o dwmac-sti.o \ - dwmac-socfpga.o stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ - chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \ - dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \ + chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \ + dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \ mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o $(stmmac-y) + +obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o +stmmac-platform-objs:= stmmac_platform.o dwmac-meson.o dwmac-sunxi.o \ + dwmac-sti.o dwmac-socfpga.o + +obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o +stmmac-pci-objs:= stmmac_pci.o diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index bd75ee8b2757..c0a391983372 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -134,65 +134,4 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, void stmmac_disable_eee_mode(struct stmmac_priv *priv); bool stmmac_eee_init(struct stmmac_priv *priv); -#ifdef CONFIG_STMMAC_PLATFORM -extern struct platform_driver stmmac_pltfr_driver; - -static inline int stmmac_register_platform(void) -{ - int err; - - err = platform_driver_register(&stmmac_pltfr_driver); - if (err) - pr_err("stmmac: failed to register the platform driver\n"); - - return err; -} - -static inline void stmmac_unregister_platform(void) -{ - platform_driver_unregister(&stmmac_pltfr_driver); -} -#else -static inline int stmmac_register_platform(void) -{ - pr_debug("stmmac: do not register the platf driver\n"); - - return 0; -} - -static inline void stmmac_unregister_platform(void) -{ -} -#endif /* CONFIG_STMMAC_PLATFORM */ - -#ifdef CONFIG_STMMAC_PCI -extern struct pci_driver stmmac_pci_driver; -static inline int stmmac_register_pci(void) -{ - int err; - - err = pci_register_driver(&stmmac_pci_driver); - if (err) - pr_err("stmmac: failed to register the PCI driver\n"); - - return err; -} - -static inline void stmmac_unregister_pci(void) -{ - pci_unregister_driver(&stmmac_pci_driver); -} -#else -static inline int stmmac_register_pci(void) -{ - pr_debug("stmmac: do not register the PCI driver\n"); - - return 0; -} - -static inline void stmmac_unregister_pci(void) -{ -} -#endif /* CONFIG_STMMAC_PCI */ - #endif /* __STMMAC_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 53db11b29e61..0f1c146fcce1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2885,6 +2885,7 @@ error_clk_get: return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(stmmac_dvr_probe); /** * stmmac_dvr_remove @@ -2914,8 +2915,8 @@ int stmmac_dvr_remove(struct net_device *ndev) return 0; } +EXPORT_SYMBOL_GPL(stmmac_dvr_remove); -#ifdef CONFIG_PM int stmmac_suspend(struct net_device *ndev) { struct stmmac_priv *priv = netdev_priv(ndev); @@ -2957,6 +2958,7 @@ int stmmac_suspend(struct net_device *ndev) priv->oldduplex = -1; return 0; } +EXPORT_SYMBOL_GPL(stmmac_suspend); int stmmac_resume(struct net_device *ndev) { @@ -3003,37 +3005,7 @@ int stmmac_resume(struct net_device *ndev) return 0; } -#endif /* CONFIG_PM */ - -/* Driver can be configured w/ and w/ both PCI and Platf drivers - * depending on the configuration selected. - */ -static int __init stmmac_init(void) -{ - int ret; - - ret = stmmac_register_platform(); - if (ret) - goto err; - ret = stmmac_register_pci(); - if (ret) - goto err_pci; - return 0; -err_pci: - stmmac_unregister_platform(); -err: - pr_err("stmmac: driver registration failed\n"); - return ret; -} - -static void __exit stmmac_exit(void) -{ - stmmac_unregister_platform(); - stmmac_unregister_pci(); -} - -module_init(stmmac_init); -module_exit(stmmac_exit); +EXPORT_SYMBOL_GPL(stmmac_resume); #ifndef MODULE static int __init stmmac_cmdline_opt(char *str) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 5084699baeab..77a6d68f7189 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -158,7 +158,7 @@ static const struct pci_device_id stmmac_id_table[] = { MODULE_DEVICE_TABLE(pci, stmmac_id_table); -struct pci_driver stmmac_pci_driver = { +static struct pci_driver stmmac_pci_driver = { .name = STMMAC_RESOURCE_NAME, .id_table = stmmac_id_table, .probe = stmmac_pci_probe, @@ -168,6 +168,8 @@ struct pci_driver stmmac_pci_driver = { }, }; +module_pci_driver(stmmac_pci_driver); + MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PCI driver"); MODULE_AUTHOR("Rayagond Kokatanur <rayagond.kokatanur@vayavyalabs.com>"); MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 9f18401022e1..e22a960f8bd7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -362,7 +362,7 @@ static int stmmac_pltfr_remove(struct platform_device *pdev) return ret; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int stmmac_pltfr_suspend(struct device *dev) { int ret; @@ -388,13 +388,12 @@ static int stmmac_pltfr_resume(struct device *dev) return stmmac_resume(ndev); } - -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ static SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, - stmmac_pltfr_suspend, stmmac_pltfr_resume); + stmmac_pltfr_suspend, stmmac_pltfr_resume); -struct platform_driver stmmac_pltfr_driver = { +static struct platform_driver stmmac_pltfr_driver = { .probe = stmmac_pltfr_probe, .remove = stmmac_pltfr_remove, .driver = { @@ -402,9 +401,11 @@ struct platform_driver stmmac_pltfr_driver = { .owner = THIS_MODULE, .pm = &stmmac_pltfr_pm_ops, .of_match_table = of_match_ptr(stmmac_dt_ids), - }, + }, }; +module_platform_driver(stmmac_pltfr_driver); + MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PLATFORM driver"); MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index d07bf4cb893f..26423adc35ee 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1670,12 +1670,14 @@ done: static int smsc95xx_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); - struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); - u8 suspend_flags = pdata->suspend_flags; + struct smsc95xx_priv *pdata; + u8 suspend_flags; int ret; u32 val; BUG_ON(!dev); + pdata = (struct smsc95xx_priv *)(dev->data[0]); + suspend_flags = pdata->suspend_flags; netdev_dbg(dev->net, "resume suspend_flags=0x%02x\n", suspend_flags); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 5cc5eac47d1b..3d9bff00f24a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -497,6 +497,7 @@ struct mlx4_caps { u16 hca_core_clock; u64 phys_port_id[MLX4_MAX_PORTS + 1]; int tunnel_offload_mode; + u8 rx_checksum_flags_port[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h index a059465101ff..42aa054aa67f 100644 --- a/include/net/irda/irda.h +++ b/include/net/irda/irda.h @@ -77,9 +77,9 @@ do { if(!(expr)) { \ #define IRDA_ASSERT_LABEL(label) #endif /* CONFIG_IRDA_DEBUG */ -#define IRDA_WARNING(args...) do { if (net_ratelimit()) printk(KERN_WARNING args); } while (0) -#define IRDA_MESSAGE(args...) do { if (net_ratelimit()) printk(KERN_INFO args); } while (0) -#define IRDA_ERROR(args...) do { if (net_ratelimit()) printk(KERN_ERR args); } while (0) +#define IRDA_ERROR net_err_ratelimited +#define IRDA_WARNING net_warn_ratelimited +#define IRDA_MESSAGE net_info_ratelimited /* * Magic numbers used by Linux-IrDA. Random numbers which must be unique to diff --git a/include/net/neighbour.h b/include/net/neighbour.h index dedfb188b1a7..eb070b3674a1 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -220,6 +220,13 @@ struct neigh_table { struct pneigh_entry **phash_buckets; }; +enum { + NEIGH_ARP_TABLE = 0, + NEIGH_ND_TABLE = 1, + NEIGH_DN_TABLE = 2, + NEIGH_NR_TABLES, +}; + static inline int neigh_parms_family(struct neigh_parms *p) { return p->tbl->family; @@ -240,8 +247,8 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 -void neigh_table_init(struct neigh_table *tbl); -int neigh_table_clear(struct neigh_table *tbl); +void neigh_table_init(int index, struct neigh_table *tbl); +int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev); struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, diff --git a/include/net/sock.h b/include/net/sock.h index 6767d75ecb17..83a669f83bae 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -273,6 +273,7 @@ struct cg_proto; * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_rxhash: flow hash received from netif layer + * @sk_incoming_cpu: record cpu processing incoming packets * @sk_txhash: computed flow hash for use on transmit * @sk_filter: socket filtering instructions * @sk_protinfo: private area, net family specific, when not using slab @@ -350,6 +351,12 @@ struct sock { #ifdef CONFIG_RPS __u32 sk_rxhash; #endif + u16 sk_incoming_cpu; + /* 16bit hole + * Warned : sk_incoming_cpu can be set from softirq, + * Do not use this hole without fully understanding possible issues. + */ + __u32 sk_txhash; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_napi_id; @@ -833,6 +840,11 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) return sk->sk_backlog_rcv(sk, skb); } +static inline void sk_incoming_cpu_update(struct sock *sk) +{ + sk->sk_incoming_cpu = raw_smp_processor_id(); +} + static inline void sock_rps_record_flow_hash(__u32 hash) { #ifdef CONFIG_RPS @@ -2276,13 +2288,6 @@ bool sk_ns_capable(const struct sock *sk, bool sk_capable(const struct sock *sk, int cap); bool sk_net_capable(const struct sock *sk, int cap); -/* - * Enable debug/info messages - */ -extern int net_msg_warn; -#define LIMIT_NETDEBUG(fmt, args...) \ - do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0) - extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; diff --git a/include/net/udplite.h b/include/net/udplite.h index 2caadabcd07b..9a28a5179400 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -40,7 +40,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets * with a zero checksum field are illegal. */ if (uh->check == 0) { - LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: zeroed checksum field\n"); + net_dbg_ratelimited("UDPLite: zeroed checksum field\n"); return 1; } @@ -52,8 +52,8 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* * Coverage length violates RFC 3828: log and discard silently. */ - LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: bad csum coverage %d/%d\n", - cscov, skb->len); + net_dbg_ratelimited("UDPLite: bad csum coverage %d/%d\n", + cscov, skb->len); return 1; } else if (cscov < skb->len) { diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index ea0796bdcf88..f541ccefd4ac 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -82,4 +82,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 26c36c4cf7e2..3a6dcaa359b7 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -157,6 +157,11 @@ enum ovs_packet_cmd { * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content * specified there. + * @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the + * output port is actually a tunnel port. Contains the output tunnel key + * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -167,6 +172,8 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ + OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_* + attributes. */ __OVS_PACKET_ATTR_MAX }; @@ -315,6 +322,8 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ + OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ + OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ __OVS_TUNNEL_KEY_ATTR_MAX }; @@ -448,6 +457,8 @@ enum ovs_flow_attr { OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error + * logging should be suppressed. */ __OVS_FLOW_ATTR_MAX }; @@ -480,11 +491,15 @@ enum ovs_sample_attr { * message should be sent. Required. * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. + * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get + * tunnel info. */ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_UNSPEC, OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ + OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port + * to get tunnel info. */ __OVS_USERSPACE_ATTR_MAX }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index edd04116ecb7..8e38f17288d3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -56,7 +56,6 @@ static void __neigh_notify(struct neighbour *n, int type, int flags); static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); -static struct neigh_table *neigh_tables; #ifdef CONFIG_PROC_FS static const struct file_operations neigh_stat_seq_fops; #endif @@ -87,13 +86,8 @@ static const struct file_operations neigh_stat_seq_fops; the most complicated procedure, which we allow is dev->hard_header. It is supposed, that dev->hard_header is simplistic and does not make callbacks to neighbour tables. - - The last lock is neigh_tbl_lock. It is pure SMP lock, protecting - list of neighbour tables. This list is used only in process context, */ -static DEFINE_RWLOCK(neigh_tbl_lock); - static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) { kfree_skb(skb); @@ -1520,7 +1514,9 @@ static void neigh_parms_destroy(struct neigh_parms *parms) static struct lock_class_key neigh_table_proxy_queue_class; -static void neigh_table_init_no_netlink(struct neigh_table *tbl) +static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; + +void neigh_table_init(int index, struct neigh_table *tbl) { unsigned long now = jiffies; unsigned long phsize; @@ -1566,34 +1562,14 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time * 20; -} - -void neigh_table_init(struct neigh_table *tbl) -{ - struct neigh_table *tmp; - neigh_table_init_no_netlink(tbl); - write_lock(&neigh_tbl_lock); - for (tmp = neigh_tables; tmp; tmp = tmp->next) { - if (tmp->family == tbl->family) - break; - } - tbl->next = neigh_tables; - neigh_tables = tbl; - write_unlock(&neigh_tbl_lock); - - if (unlikely(tmp)) { - pr_err("Registering multiple tables for family %d\n", - tbl->family); - dump_stack(); - } + neigh_tables[index] = tbl; } EXPORT_SYMBOL(neigh_table_init); -int neigh_table_clear(struct neigh_table *tbl) +int neigh_table_clear(int index, struct neigh_table *tbl) { - struct neigh_table **tp; - + neigh_tables[index] = NULL; /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); @@ -1601,14 +1577,6 @@ int neigh_table_clear(struct neigh_table *tbl) neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); - write_lock(&neigh_tbl_lock); - for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { - if (*tp == tbl) { - *tp = tbl->next; - break; - } - } - write_unlock(&neigh_tbl_lock); call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, neigh_hash_free_rcu); @@ -1626,12 +1594,32 @@ int neigh_table_clear(struct neigh_table *tbl) } EXPORT_SYMBOL(neigh_table_clear); +static struct neigh_table *neigh_find_table(int family) +{ + struct neigh_table *tbl = NULL; + + switch (family) { + case AF_INET: + tbl = neigh_tables[NEIGH_ARP_TABLE]; + break; + case AF_INET6: + tbl = neigh_tables[NEIGH_ND_TABLE]; + break; + case AF_DECnet: + tbl = neigh_tables[NEIGH_DN_TABLE]; + break; + } + + return tbl; +} + static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; + struct neighbour *neigh; struct net_device *dev = NULL; int err = -EINVAL; @@ -1652,39 +1640,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) } } - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct neighbour *neigh; - - if (tbl->family != ndm->ndm_family) - continue; - read_unlock(&neigh_tbl_lock); - - if (nla_len(dst_attr) < tbl->key_len) - goto out; + tbl = neigh_find_table(ndm->ndm_family); + if (tbl == NULL) + return -EAFNOSUPPORT; - if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); - goto out; - } + if (nla_len(dst_attr) < tbl->key_len) + goto out; - if (dev == NULL) - goto out; + if (ndm->ndm_flags & NTF_PROXY) { + err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); + goto out; + } - neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); - if (neigh == NULL) { - err = -ENOENT; - goto out; - } + if (dev == NULL) + goto out; - err = neigh_update(neigh, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_ADMIN); - neigh_release(neigh); + neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); + if (neigh == NULL) { + err = -ENOENT; goto out; } - read_unlock(&neigh_tbl_lock); - err = -EAFNOSUPPORT; + + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE | + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); out: return err; @@ -1692,11 +1672,14 @@ out: static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) { + int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; + struct neighbour *neigh; + void *dst, *lladdr; int err; ASSERT_RTNL(); @@ -1720,70 +1703,60 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables; tbl; tbl = tbl->next) { - int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; - struct neighbour *neigh; - void *dst, *lladdr; + tbl = neigh_find_table(ndm->ndm_family); + if (tbl == NULL) + return -EAFNOSUPPORT; - if (tbl->family != ndm->ndm_family) - continue; - read_unlock(&neigh_tbl_lock); + if (nla_len(tb[NDA_DST]) < tbl->key_len) + goto out; + dst = nla_data(tb[NDA_DST]); + lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; - if (nla_len(tb[NDA_DST]) < tbl->key_len) - goto out; - dst = nla_data(tb[NDA_DST]); - lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; + if (ndm->ndm_flags & NTF_PROXY) { + struct pneigh_entry *pn; + + err = -ENOBUFS; + pn = pneigh_lookup(tbl, net, dst, dev, 1); + if (pn) { + pn->flags = ndm->ndm_flags; + err = 0; + } + goto out; + } - if (ndm->ndm_flags & NTF_PROXY) { - struct pneigh_entry *pn; + if (dev == NULL) + goto out; - err = -ENOBUFS; - pn = pneigh_lookup(tbl, net, dst, dev, 1); - if (pn) { - pn->flags = ndm->ndm_flags; - err = 0; - } + neigh = neigh_lookup(tbl, dst, dev); + if (neigh == NULL) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; goto out; } - if (dev == NULL) + neigh = __neigh_lookup_errno(tbl, dst, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); + goto out; + } + } else { + if (nlh->nlmsg_flags & NLM_F_EXCL) { + err = -EEXIST; + neigh_release(neigh); goto out; - - neigh = neigh_lookup(tbl, dst, dev); - if (neigh == NULL) { - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - err = -ENOENT; - goto out; - } - - neigh = __neigh_lookup_errno(tbl, dst, dev); - if (IS_ERR(neigh)) { - err = PTR_ERR(neigh); - goto out; - } - } else { - if (nlh->nlmsg_flags & NLM_F_EXCL) { - err = -EEXIST; - neigh_release(neigh); - goto out; - } - - if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) - flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - if (ndm->ndm_flags & NTF_USE) { - neigh_event_send(neigh, NULL); - err = 0; - } else - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); - neigh_release(neigh); - goto out; + if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) + flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - read_unlock(&neigh_tbl_lock); - err = -EAFNOSUPPORT; + if (ndm->ndm_flags & NTF_USE) { + neigh_event_send(neigh, NULL); + err = 0; + } else + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + neigh_release(neigh); + out: return err; } @@ -1982,7 +1955,8 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; - int err; + bool found = false; + int err, tidx; err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, nl_neightbl_policy); @@ -1995,19 +1969,21 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) } ndtmsg = nlmsg_data(nlh); - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables; tbl; tbl = tbl->next) { + + for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { + tbl = neigh_tables[tidx]; + if (!tbl) + continue; if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) continue; - - if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) + if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { + found = true; break; + } } - if (tbl == NULL) { - err = -ENOENT; - goto errout_locked; - } + if (!found) + return -ENOENT; /* * We acquire tbl->lock to be nice to the periodic timers and @@ -2118,8 +2094,6 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) errout_tbl_lock: write_unlock_bh(&tbl->lock); -errout_locked: - read_unlock(&neigh_tbl_lock); errout: return err; } @@ -2134,10 +2108,13 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { + for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { struct neigh_parms *p; + tbl = neigh_tables[tidx]; + if (!tbl) + continue; + if (tidx < tbl_skip || (family && tbl->family != family)) continue; @@ -2168,7 +2145,6 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) neigh_skip = 0; } out: - read_unlock(&neigh_tbl_lock); cb->args[0] = tidx; cb->args[1] = nidx; @@ -2351,7 +2327,6 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) int proxy = 0; int err; - read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; /* check for full ndmsg structure presence, family member is @@ -2363,8 +2338,11 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; - for (tbl = neigh_tables, t = 0; tbl; - tbl = tbl->next, t++) { + for (t = 0; t < NEIGH_NR_TABLES; t++) { + tbl = neigh_tables[t]; + + if (!tbl) + continue; if (t < s_t || (family && tbl->family != family)) continue; if (t > s_t) @@ -2377,7 +2355,6 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) break; } - read_unlock(&neigh_tbl_lock); cb->args[0] = t; return skb->len; diff --git a/net/core/sock.c b/net/core/sock.c index ac56dd06c306..0725cf0cb685 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1213,6 +1213,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_max_pacing_rate; break; + case SO_INCOMING_CPU: + v.val = sk->sk_incoming_cpu; + break; + default: return -ENOPROTOOPT; } @@ -1517,6 +1521,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; + newsk->sk_incoming_cpu = raw_smp_processor_id(); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cf9cd13509a7..f93f092fe226 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -26,6 +26,8 @@ static int zero = 0; static int one = 1; static int ushort_max = USHRT_MAX; +static int net_msg_warn; /* Unused, but still a sysctl */ + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/net/core/utils.c b/net/core/utils.c index efc76dd9dcd1..7b803884c162 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -33,9 +33,6 @@ #include <asm/byteorder.h> #include <asm/uaccess.h> -int net_msg_warn __read_mostly = 1; -EXPORT_SYMBOL(net_msg_warn); - DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index c8121ceddb9e..7ca7c3143da3 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -591,7 +591,7 @@ static const struct file_operations dn_neigh_seq_fops = { void __init dn_neigh_init(void) { - neigh_table_init(&dn_neigh_table); + neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table); proc_create("decnet_neigh", S_IRUGO, init_net.proc_net, &dn_neigh_seq_fops); } @@ -599,5 +599,5 @@ void __init dn_neigh_init(void) void __exit dn_neigh_cleanup(void) { remove_proc_entry("decnet_neigh", init_net.proc_net); - neigh_table_clear(&dn_neigh_table); + neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table); } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index dd646a8025cb..4648f12098ad 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -192,12 +192,12 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, */ drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); if (drv == NULL) { - printk(KERN_ERR "%s[%d]: could not detect attached switch\n", - dst->master_netdev->name, index); + netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", + index); return ERR_PTR(-EINVAL); } - printk(KERN_INFO "%s[%d]: detected a %s switch\n", - dst->master_netdev->name, index, name); + netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", + index, name); /* @@ -225,7 +225,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, if (!strcmp(name, "cpu")) { if (dst->cpu_switch != -1) { - printk(KERN_ERR "multiple cpu ports?!\n"); + netdev_err(dst->master_netdev, + "multiple cpu ports?!\n"); ret = -EINVAL; goto out; } @@ -320,10 +321,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); if (slave_dev == NULL) { - printk(KERN_ERR "%s[%d]: can't create dsa " - "slave device for port %d(%s)\n", - dst->master_netdev->name, - index, i, pd->port_names[i]); + netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n", + index, i, pd->port_names[i]); continue; } @@ -701,15 +700,13 @@ static inline void dsa_of_remove(struct platform_device *pdev) static int dsa_probe(struct platform_device *pdev) { - static int dsa_version_printed; struct dsa_platform_data *pd = pdev->dev.platform_data; struct net_device *dev; struct dsa_switch_tree *dst; int i, ret; - if (!dsa_version_printed++) - printk(KERN_NOTICE "Distributed Switch Architecture " - "driver version %s\n", dsa_driver_version); + pr_notice_once("Distributed Switch Architecture driver version %s\n", + dsa_driver_version); if (pdev->dev.of_node) { ret = dsa_of_probe(pdev); @@ -753,9 +750,8 @@ static int dsa_probe(struct platform_device *pdev) ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev); if (IS_ERR(ds)) { - printk(KERN_ERR "%s[%d]: couldn't create dsa switch " - "instance (error %ld)\n", dev->name, i, - PTR_ERR(ds)); + netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", + i, PTR_ERR(ds)); continue; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0ea466dad818..528380a3e296 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -532,7 +532,7 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p, */ ret = of_phy_register_fixed_link(port_dn); if (ret) { - pr_err("failed to register fixed PHY\n"); + netdev_err(slave_dev, "failed to register fixed PHY\n"); return; } phy_is_fixed = true; @@ -558,8 +558,8 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p, phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, p->phy_interface); } else { - pr_info("attached PHY at address %d [%s]\n", - p->phy->addr, p->phy->drv->name); + netdev_info(slave_dev, "attached PHY at address %d [%s]\n", + p->phy->addr, p->phy->drv->name); } } @@ -657,8 +657,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, ret = register_netdev(slave_dev); if (ret) { - printk(KERN_ERR "%s: error %d registering interface %s\n", - master->name, ret, slave_dev->name); + netdev_err(master, "error %d registering interface %s\n", + ret, slave_dev->name); free_netdev(slave_dev); return NULL; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 16acb59d665e..205e1472aa78 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1292,7 +1292,7 @@ static int arp_proc_init(void); void __init arp_init(void) { - neigh_table_init(&arp_tbl); + neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl); dev_add_pack(&arp_packet_type); arp_proc_init(); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5882f584910e..36b7bfa609d6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -784,8 +784,8 @@ static void icmp_unreach(struct sk_buff *skb) */ switch (net->ipv4.sysctl_ip_no_pmtu_disc) { default: - LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), - &iph->daddr); + net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", + &iph->daddr); break; case 2: goto out; @@ -798,8 +798,8 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"), - &iph->daddr); + net_dbg_ratelimited("%pI4: Source Route Failed\n", + &iph->daddr); break; default: break; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 19419b60cb37..e7920352646a 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -458,6 +458,6 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, ". Dropping fragment.\n"; if (PTR_ERR(q) == -ENOBUFS) - LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); + net_dbg_ratelimited("%s%s", prefix, msg); } EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4d964dadd655..e5b6d0ddcb58 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -618,8 +618,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, return 0; out_nomem: - LIMIT_NETDEBUG(KERN_ERR pr_fmt("queue_glue: no memory for gluing queue %p\n"), - qp); + net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp); err = -ENOMEM; goto out_fail; out_oversize: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5f979c7f5135..d91436ba17ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5854,12 +5854,12 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) struct inet_request_sock *ireq = inet_rsk(req); if (family == AF_INET) - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), - &ireq->ir_rmt_addr, port); + net_dbg_ratelimited("drop open request from %pI4/%u\n", + &ireq->ir_rmt_addr, port); #if IS_ENABLED(CONFIG_IPV6) else if (family == AF_INET6) - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"), - &ireq->ir_v6_rmt_addr, port); + net_dbg_ratelimited("drop open request from %pI6/%u\n", + &ireq->ir_v6_rmt_addr, port); #endif } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9c7d7621466b..2c6a955fd5c3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1429,6 +1429,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, 0) == NULL) { @@ -1450,6 +1451,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); + sk_mark_napi_id(sk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1661,7 +1663,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 9b21ae8b2e31..1829c7fbc77e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -374,17 +374,19 @@ void tcp_retransmit_timer(struct sock *sk) */ struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), - &inet->inet_daddr, - ntohs(inet->inet_dport), inet->inet_num, - tp->snd_una, tp->snd_nxt); + net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", + &inet->inet_daddr, + ntohs(inet->inet_dport), + inet->inet_num, + tp->snd_una, tp->snd_nxt); } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), - &sk->sk_v6_daddr, - ntohs(inet->inet_dport), inet->inet_num, - tp->snd_una, tp->snd_nxt); + net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", + &sk->sk_v6_daddr, + ntohs(inet->inet_dport), + inet->inet_num, + tp->snd_una, tp->snd_nxt); } #endif if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5d0fdca8e965..1b6e9d5fadef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1051,7 +1051,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n")); + net_dbg_ratelimited("cork app bug 2\n"); err = -EINVAL; goto out; } @@ -1133,7 +1133,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n")); + net_dbg_ratelimited("udp cork app bug 3\n"); return -EINVAL; } @@ -1445,6 +1445,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (inet_sk(sk)->inet_daddr) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); } rc = sock_queue_rcv_skb(sk, skb); @@ -1546,8 +1547,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) * provided by the application." */ if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); + net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } /* The next case involves violating the min. coverage requested @@ -1557,8 +1558,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) * Therefore the above ...()->partial_cov statement is essential. */ if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); goto drop; } } @@ -1827,11 +1828,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", - proto == IPPROTO_UDPLITE ? "Lite" : "", - &saddr, ntohs(uh->source), - ulen, skb->len, - &daddr, ntohs(uh->dest)); + net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", + proto == IPPROTO_UDPLITE ? "Lite" : "", + &saddr, ntohs(uh->source), + ulen, skb->len, + &daddr, ntohs(uh->dest)); goto drop; csum_error: @@ -1839,10 +1840,10 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", - proto == IPPROTO_UDPLITE ? "Lite" : "", - &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), - ulen); + net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", + proto == IPPROTO_UDPLITE ? "Lite" : "", + &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), + ulen); UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 06e897832a7a..251fcb48b216 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1411,10 +1411,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, if (unlikely(score->addr_type == IPV6_ADDR_ANY || score->addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ADDRCONF: unspecified / multicast address " - "assigned as unicast address on %s", - dev->name); + net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", + dev->name); continue; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6d16eb0e0c7f..8ab1989198f6 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -272,10 +272,9 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) ipv6_rearrange_destopt(iph, exthdr.opth); case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING "overrun %sopts\n", - nexthdr == NEXTHDR_HOP ? - "hop" : "dest"); + net_dbg_ratelimited("overrun %sopts\n", + nexthdr == NEXTHDR_HOP ? + "hop" : "dest"); return -EINVAL; } break; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5c6996e44b14..cc1139687fd7 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -893,8 +893,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, break; } default: - LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", - cmsg->cmsg_type); + net_dbg_ratelimited("invalid cmsg type: %d\n", + cmsg->cmsg_type); err = -EINVAL; goto exit_f; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d21d7b22eebc..d2c2d749b6db 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -286,8 +286,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) err = -EINVAL; padlen = nexthdr[0]; if (padlen + 2 + alen >= elen) { - LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage " - "padlen=%d, elen=%d\n", padlen + 2, elen - alen); + net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", + padlen + 2, elen - alen); goto out; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 601d896f22d0..a7bbbe45570b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -184,7 +184,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) int ret; if (opt->dsthao) { - LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); + net_dbg_ratelimited("hao duplicated\n"); goto discard; } opt->dsthao = opt->dst1; @@ -193,14 +193,14 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); if (hao->length != 16) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao invalid option length = %d\n", hao->length); + net_dbg_ratelimited("hao invalid option length = %d\n", + hao->length); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); + net_dbg_ratelimited("hao is not an unicast addr: %pI6\n", + &hao->addr); goto discard; } @@ -551,8 +551,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", - nh[optoff + 1]); + net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n", + nh[optoff + 1]); kfree_skb(skb); return false; } @@ -566,8 +566,8 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", - nh[optoff+1]); + net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", + nh[optoff+1]); IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 62c1037d9e83..092934032077 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -338,7 +338,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, * anycast. */ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n"); + net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } @@ -452,7 +452,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n"); + net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); return; } @@ -460,7 +460,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n"); + net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); return; } @@ -509,7 +509,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); + net_dbg_ratelimited("icmp: len problem\n"); goto out_dst_release; } @@ -706,9 +706,8 @@ static int icmpv6_rcv(struct sk_buff *skb) daddr = &ipv6_hdr(skb)->daddr; if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); + net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", + saddr, daddr); goto csum_error; } @@ -781,7 +780,7 @@ static int icmpv6_rcv(struct sk_buff *skb) if (type & ICMPV6_INFOMSG_MASK) break; - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); + net_dbg_ratelimited("icmpv6: msg of unknown type\n"); /* * error of unknown type. diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f61429d391d3..b9779d441b12 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -97,16 +97,17 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", - mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); + net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n", + mh->ip6mh_hdrlen, + mip6_mh_len(mh->ip6mh_type)); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", - mh->ip6mh_proto); + net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n", + mh->ip6mh_proto); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + skb_network_header_len(skb)); return -1; @@ -288,7 +289,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, * XXX: packet if HAO exists. */ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { - LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); + net_dbg_ratelimited("mip6: hao exists already, override\n"); return offset; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4cb45c1079a2..2c9f6bf57325 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1763,7 +1763,7 @@ int __init ndisc_init(void) /* * Initialize the neighbour table */ - neigh_table_init(&nd_tbl); + neigh_table_init(NEIGH_ND_TABLE, &nd_tbl); #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, @@ -1796,6 +1796,6 @@ void ndisc_cleanup(void) #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif - neigh_table_clear(&nd_tbl); + neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl); unregister_pernet_subsys(&ndisc_net_ops); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d38e6a8d8b9f..398377a9d018 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -36,7 +36,7 @@ int ip6_route_me_harder(struct sk_buff *skb) err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); + net_dbg_ratelimited("ip6_route_me_harder: No more route\n"); dst_release(dst); return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ace29b60813c..1985b4933a6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1293,6 +1293,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, np->rx_dst_cookie) == NULL) { @@ -1322,6 +1323,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) */ if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); + sk_mark_napi_id(sk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1454,7 +1456,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b756355e9739..0ba3de4f2368 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -577,6 +577,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); } rc = sock_queue_rcv_skb(sk, skb); @@ -659,15 +660,13 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" - " %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); + net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " - "too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); goto drop; } } @@ -760,9 +759,9 @@ static void udp6_csum_zero_error(struct sk_buff *skb) /* RFC 2460 section 8.1 says that we SHOULD log * this error. Well, it is reasonable. */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", - &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), - &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); + net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); } /* @@ -930,14 +929,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", - saddr, - ntohs(uh->source), - ulen, - skb->len, - daddr, - ntohs(uh->dest)); + net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + saddr, ntohs(uh->source), + ulen, skb->len, + daddr, ntohs(uh->dest)); goto discard; csum_error: UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); @@ -1289,7 +1285,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + net_dbg_ratelimited("udp cork app bug 2\n"); err = -EINVAL; goto out; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index f7e589159e4a..394efa67934e 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -69,7 +69,7 @@ static void action_fifo_init(struct action_fifo *fifo) fifo->tail = 0; } -static bool action_fifo_is_empty(struct action_fifo *fifo) +static bool action_fifo_is_empty(const struct action_fifo *fifo) { return (fifo->head == fifo->tail); } @@ -92,7 +92,7 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo) /* Return true if fifo is not full */ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, - struct sw_flow_key *key, + const struct sw_flow_key *key, const struct nlattr *attr) { struct action_fifo *fifo; @@ -109,6 +109,16 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, return da; } +static void invalidate_flow_key(struct sw_flow_key *key) +{ + key->eth.type = htons(0); +} + +static bool is_flow_key_valid(const struct sw_flow_key *key) +{ + return !!key->eth.type; +} + static int make_writable(struct sk_buff *skb, int write_len) { if (!pskb_may_pull(skb, write_len)) @@ -120,7 +130,7 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } -static int push_mpls(struct sk_buff *skb, +static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_mpls *mpls) { __be32 *new_mpls_lse; @@ -151,10 +161,12 @@ static int push_mpls(struct sk_buff *skb, skb_set_inner_protocol(skb, skb->protocol); skb->protocol = mpls->mpls_ethertype; + invalidate_flow_key(key); return 0; } -static int pop_mpls(struct sk_buff *skb, const __be16 ethertype) +static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, + const __be16 ethertype) { struct ethhdr *hdr; int err; @@ -181,10 +193,13 @@ static int pop_mpls(struct sk_buff *skb, const __be16 ethertype) hdr->h_proto = ethertype; if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; + + invalidate_flow_key(key); return 0; } -static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) +static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, + const __be32 *mpls_lse) { __be32 *stack; int err; @@ -196,13 +211,12 @@ static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) stack = (__be32 *)skb_mpls_header(skb); if (skb->ip_summed == CHECKSUM_COMPLETE) { __be32 diff[] = { ~(*stack), *mpls_lse }; - skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } *stack = *mpls_lse; - + key->mpls.top_lse = *mpls_lse; return 0; } @@ -237,7 +251,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) return 0; } -static int pop_vlan(struct sk_buff *skb) +static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) { __be16 tci; int err; @@ -255,9 +269,12 @@ static int pop_vlan(struct sk_buff *skb) } /* move next vlan tag to hw accel tag */ if (likely(skb->protocol != htons(ETH_P_8021Q) || - skb->len < VLAN_ETH_HLEN)) + skb->len < VLAN_ETH_HLEN)) { + key->eth.tci = 0; return 0; + } + invalidate_flow_key(key); err = __pop_vlan_tci(skb, &tci); if (unlikely(err)) return err; @@ -266,7 +283,8 @@ static int pop_vlan(struct sk_buff *skb) return 0; } -static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) +static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_action_push_vlan *vlan) { if (unlikely(vlan_tx_tag_present(skb))) { u16 current_tag; @@ -283,12 +301,15 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); + invalidate_flow_key(key); + } else { + key->eth.tci = vlan->vlan_tci; } __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; } -static int set_eth_addr(struct sk_buff *skb, +static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_key_ethernet *eth_key) { int err; @@ -303,11 +324,13 @@ static int set_eth_addr(struct sk_buff *skb, ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + ether_addr_copy(key->eth.src, eth_key->eth_src); + ether_addr_copy(key->eth.dst, eth_key->eth_dst); return 0; } static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, - __be32 *addr, __be32 new_addr) + __be32 *addr, __be32 new_addr) { int transport_len = skb->len - skb_transport_offset(skb); @@ -386,7 +409,8 @@ static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) nh->ttl = new_ttl; } -static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) +static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_ipv4 *ipv4_key) { struct iphdr *nh; int err; @@ -398,22 +422,31 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) nh = ip_hdr(skb); - if (ipv4_key->ipv4_src != nh->saddr) + if (ipv4_key->ipv4_src != nh->saddr) { set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); + key->ipv4.addr.src = ipv4_key->ipv4_src; + } - if (ipv4_key->ipv4_dst != nh->daddr) + if (ipv4_key->ipv4_dst != nh->daddr) { set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); + key->ipv4.addr.dst = ipv4_key->ipv4_dst; + } - if (ipv4_key->ipv4_tos != nh->tos) + if (ipv4_key->ipv4_tos != nh->tos) { ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); + key->ip.tos = nh->tos; + } - if (ipv4_key->ipv4_ttl != nh->ttl) + if (ipv4_key->ipv4_ttl != nh->ttl) { set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); + key->ip.ttl = ipv4_key->ipv4_ttl; + } return 0; } -static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) +static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_ipv6 *ipv6_key) { struct ipv6hdr *nh; int err; @@ -429,9 +462,12 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) saddr = (__be32 *)&nh->saddr; daddr = (__be32 *)&nh->daddr; - if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) + if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) { set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, ipv6_key->ipv6_src, true); + memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src, + sizeof(ipv6_key->ipv6_src)); + } if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { unsigned int offset = 0; @@ -445,12 +481,18 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, ipv6_key->ipv6_dst, recalc_csum); + memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst, + sizeof(ipv6_key->ipv6_dst)); } set_ipv6_tc(nh, ipv6_key->ipv6_tclass); + key->ip.tos = ipv6_get_dsfield(nh); + set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); - nh->hop_limit = ipv6_key->ipv6_hlimit; + key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); + nh->hop_limit = ipv6_key->ipv6_hlimit; + key->ip.ttl = ipv6_key->ipv6_hlimit; return 0; } @@ -478,7 +520,8 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) } } -static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key) +static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_udp *udp_port_key) { struct udphdr *uh; int err; @@ -489,16 +532,21 @@ static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key) return err; uh = udp_hdr(skb); - if (udp_port_key->udp_src != uh->source) + if (udp_port_key->udp_src != uh->source) { set_udp_port(skb, &uh->source, udp_port_key->udp_src); + key->tp.src = udp_port_key->udp_src; + } - if (udp_port_key->udp_dst != uh->dest) + if (udp_port_key->udp_dst != uh->dest) { set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); + key->tp.dst = udp_port_key->udp_dst; + } return 0; } -static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) +static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_tcp *tcp_port_key) { struct tcphdr *th; int err; @@ -509,17 +557,21 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) return err; th = tcp_hdr(skb); - if (tcp_port_key->tcp_src != th->source) + if (tcp_port_key->tcp_src != th->source) { set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); + key->tp.src = tcp_port_key->tcp_src; + } - if (tcp_port_key->tcp_dst != th->dest) + if (tcp_port_key->tcp_dst != th->dest) { set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); + key->tp.dst = tcp_port_key->tcp_dst; + } return 0; } -static int set_sctp(struct sk_buff *skb, - const struct ovs_key_sctp *sctp_port_key) +static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_sctp *sctp_port_key) { struct sctphdr *sh; int err; @@ -546,6 +598,8 @@ static int set_sctp(struct sk_buff *skb, sh->checksum = old_csum ^ old_correct_csum ^ new_csum; skb_clear_hash(skb); + key->tp.src = sctp_port_key->sctp_src; + key->tp.dst = sctp_port_key->sctp_dst; } return 0; @@ -564,14 +618,15 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) static int output_userspace(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr) { + struct ovs_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; upcall.cmd = OVS_PACKET_CMD_ACTION; - upcall.key = key; upcall.userdata = NULL; upcall.portid = 0; + upcall.egress_tun_info = NULL; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -583,10 +638,27 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, case OVS_USERSPACE_ATTR_PID: upcall.portid = nla_get_u32(a); break; + + case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: { + /* Get out tunnel info. */ + struct vport *vport; + + vport = ovs_vport_rcu(dp, nla_get_u32(a)); + if (vport) { + int err; + + err = ovs_vport_get_egress_tun_info(vport, skb, + &info); + if (!err) + upcall.egress_tun_info = &info; + } + break; } + + } /* End of switch. */ } - return ovs_dp_upcall(dp, skb, &upcall); + return ovs_dp_upcall(dp, skb, key, &upcall); } static int sample(struct datapath *dp, struct sk_buff *skb, @@ -656,18 +728,20 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, key->ovs_flow_hash = hash; } -static int execute_set_action(struct sk_buff *skb, - const struct nlattr *nested_attr) +static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, + const struct nlattr *nested_attr) { int err = 0; switch (nla_type(nested_attr)) { case OVS_KEY_ATTR_PRIORITY: skb->priority = nla_get_u32(nested_attr); + key->phy.priority = skb->priority; break; case OVS_KEY_ATTR_SKB_MARK: skb->mark = nla_get_u32(nested_attr); + key->phy.skb_mark = skb->mark; break; case OVS_KEY_ATTR_TUNNEL_INFO: @@ -675,31 +749,31 @@ static int execute_set_action(struct sk_buff *skb, break; case OVS_KEY_ATTR_ETHERNET: - err = set_eth_addr(skb, nla_data(nested_attr)); + err = set_eth_addr(skb, key, nla_data(nested_attr)); break; case OVS_KEY_ATTR_IPV4: - err = set_ipv4(skb, nla_data(nested_attr)); + err = set_ipv4(skb, key, nla_data(nested_attr)); break; case OVS_KEY_ATTR_IPV6: - err = set_ipv6(skb, nla_data(nested_attr)); + err = set_ipv6(skb, key, nla_data(nested_attr)); break; case OVS_KEY_ATTR_TCP: - err = set_tcp(skb, nla_data(nested_attr)); + err = set_tcp(skb, key, nla_data(nested_attr)); break; case OVS_KEY_ATTR_UDP: - err = set_udp(skb, nla_data(nested_attr)); + err = set_udp(skb, key, nla_data(nested_attr)); break; case OVS_KEY_ATTR_SCTP: - err = set_sctp(skb, nla_data(nested_attr)); + err = set_sctp(skb, key, nla_data(nested_attr)); break; case OVS_KEY_ATTR_MPLS: - err = set_mpls(skb, nla_data(nested_attr)); + err = set_mpls(skb, key, nla_data(nested_attr)); break; } @@ -711,11 +785,15 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, const struct nlattr *a, int rem) { struct deferred_action *da; - int err; - err = ovs_flow_key_update(skb, key); - if (err) - return err; + if (!is_flow_key_valid(key)) { + int err; + + err = ovs_flow_key_update(skb, key); + if (err) + return err; + } + BUG_ON(!is_flow_key_valid(key)); if (!nla_is_last(a, rem)) { /* Recirc action is the not the last action @@ -752,7 +830,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, /* Every output action needs a separate clone of 'skb', but the common * case is just a single output action, so that doing a clone and * then freeing the original skbuff is wasteful. So the following code - * is slightly obscure just to avoid that. */ + * is slightly obscure just to avoid that. + */ int prev_port = -1; const struct nlattr *a; int rem; @@ -784,21 +863,21 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_PUSH_MPLS: - err = push_mpls(skb, nla_data(a)); + err = push_mpls(skb, key, nla_data(a)); break; case OVS_ACTION_ATTR_POP_MPLS: - err = pop_mpls(skb, nla_get_be16(a)); + err = pop_mpls(skb, key, nla_get_be16(a)); break; case OVS_ACTION_ATTR_PUSH_VLAN: - err = push_vlan(skb, nla_data(a)); + err = push_vlan(skb, key, nla_data(a)); if (unlikely(err)) /* skb already freed. */ return err; break; case OVS_ACTION_ATTR_POP_VLAN: - err = pop_vlan(skb); + err = pop_vlan(skb, key); break; case OVS_ACTION_ATTR_RECIRC: @@ -813,7 +892,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_SET: - err = execute_set_action(skb, nla_data(a)); + err = execute_set_action(skb, key, nla_data(a)); break; case OVS_ACTION_ATTR_SAMPLE: @@ -865,7 +944,8 @@ static void process_deferred_actions(struct datapath *dp) /* Execute a list of actions against 'skb'. */ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_actions *acts, struct sw_flow_key *key) + const struct sw_flow_actions *acts, + struct sw_flow_key *key) { int level = this_cpu_read(exec_actions_level); int err; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 014485ec4b0d..ab141d49bb9d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -59,7 +59,7 @@ #include "vport-netdev.h" int ovs_net_id __read_mostly; -EXPORT_SYMBOL(ovs_net_id); +EXPORT_SYMBOL_GPL(ovs_net_id); static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; @@ -131,13 +131,15 @@ int lockdep_ovsl_is_held(void) else return 1; } -EXPORT_SYMBOL(lockdep_ovsl_is_held); +EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held); #endif static struct vport *new_vport(const struct vport_parms *); static int queue_gso_packets(struct datapath *dp, struct sk_buff *, + const struct sw_flow_key *, const struct dp_upcall_info *); static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, + const struct sw_flow_key *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock. */ @@ -176,7 +178,7 @@ const char *ovs_dp_name(const struct datapath *dp) return vport->ops->get_name(vport); } -static int get_dpifindex(struct datapath *dp) +static int get_dpifindex(const struct datapath *dp) { struct vport *local; int ifindex; @@ -271,10 +273,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) int error; upcall.cmd = OVS_PACKET_CMD_MISS; - upcall.key = key; upcall.userdata = NULL; upcall.portid = ovs_vport_find_upcall_portid(p, skb); - error = ovs_dp_upcall(dp, skb, &upcall); + upcall.egress_tun_info = NULL; + error = ovs_dp_upcall(dp, skb, key, &upcall); if (unlikely(error)) kfree_skb(skb); else @@ -298,6 +300,7 @@ out: } int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, + const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; @@ -309,9 +312,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, } if (!skb_is_gso(skb)) - err = queue_userspace_packet(dp, skb, upcall_info); + err = queue_userspace_packet(dp, skb, key, upcall_info); else - err = queue_gso_packets(dp, skb, upcall_info); + err = queue_gso_packets(dp, skb, key, upcall_info); if (err) goto err; @@ -328,39 +331,43 @@ err: } static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, + const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info) { unsigned short gso_type = skb_shinfo(skb)->gso_type; - struct dp_upcall_info later_info; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; + struct ovs_skb_cb ovs_cb; int err; + ovs_cb = *OVS_CB(skb); segs = __skb_gso_segment(skb, NETIF_F_SG, false); + *OVS_CB(skb) = ovs_cb; if (IS_ERR(segs)) return PTR_ERR(segs); if (segs == NULL) return -EINVAL; + if (gso_type & SKB_GSO_UDP) { + /* The initial flow key extracted by ovs_flow_key_extract() + * in this case is for a first fragment, so we need to + * properly mark later fragments. + */ + later_key = *key; + later_key.ip.frag = OVS_FRAG_TYPE_LATER; + } + /* Queue all of the segments. */ skb = segs; do { - err = queue_userspace_packet(dp, skb, upcall_info); + *OVS_CB(skb) = ovs_cb; + if (gso_type & SKB_GSO_UDP && skb != segs) + key = &later_key; + + err = queue_userspace_packet(dp, skb, key, upcall_info); if (err) break; - if (skb == segs && gso_type & SKB_GSO_UDP) { - /* The initial flow key extracted by ovs_flow_extract() - * in this case is for a first fragment, so we need to - * properly mark later fragments. - */ - later_key = *upcall_info->key; - later_key.ip.frag = OVS_FRAG_TYPE_LATER; - - later_info = *upcall_info; - later_info.key = &later_key; - upcall_info = &later_info; - } } while ((skb = skb->next)); /* Free all of the segments. */ @@ -375,7 +382,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, return err; } -static size_t upcall_msg_size(const struct nlattr *userdata, +static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, unsigned int hdrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -383,13 +390,18 @@ static size_t upcall_msg_size(const struct nlattr *userdata, + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ - if (userdata) - size += NLA_ALIGN(userdata->nla_len); + if (upcall_info->userdata) + size += NLA_ALIGN(upcall_info->userdata->nla_len); + + /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ + if (upcall_info->egress_tun_info) + size += nla_total_size(ovs_tun_key_attr_size()); return size; } static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, + const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info) { struct ovs_header *upcall; @@ -440,7 +452,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, else hlen = skb->len; - len = upcall_msg_size(upcall_info->userdata, hlen); + len = upcall_msg_size(upcall_info, hlen); user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; @@ -452,7 +464,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); + err = ovs_nla_put_flow(key, key, user_skb); BUG_ON(err); nla_nest_end(user_skb, nla); @@ -461,6 +473,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); + if (upcall_info->egress_tun_info) { + nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + err = ovs_nla_put_egress_tunnel_key(user_skb, + upcall_info->egress_tun_info); + BUG_ON(err); + nla_nest_end(user_skb, nla); + } + /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { @@ -506,6 +526,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct vport *input_vport; int len; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || @@ -539,12 +560,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_kfree_skb; err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, - &flow->key); + &flow->key, log); if (err) goto err_flow_free; err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], - &flow->key, &acts); + &flow->key, &acts, log); if (err) goto err_flow_free; @@ -613,7 +634,7 @@ static struct genl_family dp_packet_genl_family = { .n_ops = ARRAY_SIZE(dp_packet_genl_ops), }; -static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, +static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, struct ovs_dp_megaflow_stats *mega_stats) { int i; @@ -835,15 +856,16 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_actions *acts; struct sw_flow_match match; int error; + bool log = !a[OVS_FLOW_ATTR_PROBE]; /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow key attribute not present in new flow.\n"); + OVS_NLERR(log, "Flow key attr not present in new flow."); goto error; } if (!a[OVS_FLOW_ATTR_ACTIONS]) { - OVS_NLERR("Flow actions attribute not present in new flow.\n"); + OVS_NLERR(log, "Flow actions attr not present in new flow."); goto error; } @@ -858,8 +880,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Extract key. */ ovs_match_init(&match, &new_flow->unmasked_key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; @@ -867,9 +889,9 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - &acts); + &acts, log); if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); goto err_kfree_flow; } @@ -922,6 +944,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* The unmasked key has to be the same for flow updates. */ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + /* Look for any overlapping flow. */ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { error = -ENOENT; @@ -964,16 +987,18 @@ error: /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, const struct sw_flow_key *key, - const struct sw_flow_mask *mask) + const struct sw_flow_mask *mask, + bool log) { struct sw_flow_actions *acts; struct sw_flow_key masked_key; int error; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, &acts); + error = ovs_nla_copy_actions(a, &masked_key, &acts, log); if (error) { - OVS_NLERR("Actions may not be safe on all matching packets.\n"); + OVS_NLERR(log, + "Actions may not be safe on all matching packets"); return ERR_PTR(error); } @@ -992,23 +1017,25 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; int error; + bool log = !a[OVS_FLOW_ATTR_PROBE]; /* Extract key. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow key attribute not present in set flow.\n"); + OVS_NLERR(log, "Flow key attribute not present in set flow."); goto error; } ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); + acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask, + log); if (IS_ERR(acts)) { error = PTR_ERR(acts); goto error; @@ -1089,14 +1116,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_match match; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); + OVS_NLERR(log, + "Flow get message rejected, Key attribute missing."); return -EINVAL; } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (err) return err; @@ -1137,10 +1166,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_match match; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; if (likely(a[OVS_FLOW_ATTR_KEY])) { ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + log); if (unlikely(err)) return err; } @@ -1230,8 +1261,10 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, }; static const struct genl_ops dp_flow_genl_ops[] = { @@ -1332,7 +1365,7 @@ static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) /* Called with rcu_read_lock or ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, - struct ovs_header *ovs_header, + const struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) { struct datapath *dp; @@ -1360,7 +1393,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in dp->user_features = 0; } -static void ovs_dp_change(struct datapath *dp, struct nlattr **a) +static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { if (a[OVS_DP_ATTR_USER_FEATURES]) dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); @@ -1724,7 +1757,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, /* Called with ovs_mutex or RCU read lock. */ static struct vport *lookup_vport(struct net *net, - struct ovs_header *ovs_header, + const struct ovs_header *ovs_header, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) { struct datapath *dp; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1c56a80d6677..3ece94563079 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -108,18 +108,18 @@ struct ovs_skb_cb { /** * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. - * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. - * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no - * packet is sent and the packet is accounted in the datapath's @n_lost + * @portid: Netlink portid to which packet should be sent. If @portid is 0 + * then no packet is sent and the packet is accounted in the datapath's @n_lost * counter. + * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. */ struct dp_upcall_info { - u8 cmd; - const struct sw_flow_key *key; + const struct ovs_tunnel_info *egress_tun_info; const struct nlattr *userdata; u32 portid; + u8 cmd; }; /** @@ -149,7 +149,7 @@ int lockdep_ovsl_is_held(void); #define rcu_dereference_ovsl(p) \ rcu_dereference_check(p, lockdep_ovsl_is_held()) -static inline struct net *ovs_dp_get_net(struct datapath *dp) +static inline struct net *ovs_dp_get_net(const struct datapath *dp) { return read_pnet(&dp->net); } @@ -185,23 +185,23 @@ extern struct genl_family dp_vport_genl_family; void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, - const struct dp_upcall_info *); + const struct sw_flow_key *, const struct dp_upcall_info *); const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_actions *acts, struct sw_flow_key *); + const struct sw_flow_actions *, struct sw_flow_key *); void ovs_dp_notify_wq(struct work_struct *work); int action_fifos_init(void); void action_fifos_exit(void); -#define OVS_NLERR(fmt, ...) \ +#define OVS_NLERR(logging_allowed, fmt, ...) \ do { \ - if (net_ratelimit()) \ - pr_info("netlink: " fmt, ##__VA_ARGS__); \ + if (logging_allowed && net_ratelimit()) \ + pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \ } while (0) #endif /* datapath.h */ diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 90a21010fc8f..70bef2ab7f2b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -66,7 +66,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, - struct sk_buff *skb) + const struct sk_buff *skb) { struct flow_stats *stats; int node = numa_node_id(); @@ -679,7 +679,7 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) return key_extract(skb, key); } -int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, +int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { /* Extract metadata from packet. */ @@ -712,12 +712,12 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, int ovs_flow_key_extract_userspace(const struct nlattr *attr, struct sk_buff *skb, - struct sw_flow_key *key) + struct sw_flow_key *key, bool log) { int err; /* Extract metadata from netlink attributes. */ - err = ovs_nla_get_flow_metadata(attr, key); + err = ovs_nla_get_flow_metadata(attr, key, log); if (err) return err; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 4962bee81a11..a8b30f334388 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -37,8 +37,8 @@ struct sk_buff; /* Used to memset ovs_key_ipv4_tunnel padding. */ #define OVS_TUNNEL_KEY_SIZE \ - (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ - FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) + (offsetof(struct ovs_key_ipv4_tunnel, tp_dst) + \ + FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst)) struct ovs_key_ipv4_tunnel { __be64 tun_id; @@ -47,11 +47,13 @@ struct ovs_key_ipv4_tunnel { __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; + __be16 tp_src; + __be16 tp_dst; } __packed __aligned(4); /* Minimize padding. */ struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; - struct geneve_opt *options; + const struct geneve_opt *options; u8 options_len; }; @@ -64,27 +66,59 @@ struct ovs_tunnel_info { FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ opt_len)) -static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, - const struct iphdr *iph, - __be64 tun_id, __be16 tun_flags, - struct geneve_opt *opts, - u8 opts_len) +static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, + __be32 saddr, __be32 daddr, + u8 tos, u8 ttl, + __be16 tp_src, + __be16 tp_dst, + __be64 tun_id, + __be16 tun_flags, + const struct geneve_opt *opts, + u8 opts_len) { tun_info->tunnel.tun_id = tun_id; - tun_info->tunnel.ipv4_src = iph->saddr; - tun_info->tunnel.ipv4_dst = iph->daddr; - tun_info->tunnel.ipv4_tos = iph->tos; - tun_info->tunnel.ipv4_ttl = iph->ttl; + tun_info->tunnel.ipv4_src = saddr; + tun_info->tunnel.ipv4_dst = daddr; + tun_info->tunnel.ipv4_tos = tos; + tun_info->tunnel.ipv4_ttl = ttl; tun_info->tunnel.tun_flags = tun_flags; - /* clear struct padding. */ - memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, - sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); + /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of + * the upper tunnel are used. + * E.g: GRE over IPSEC, the tp_src and tp_port are zero. + */ + tun_info->tunnel.tp_src = tp_src; + tun_info->tunnel.tp_dst = tp_dst; + + /* Clear struct padding. */ + if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE) + memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, + 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); tun_info->options = opts; tun_info->options_len = opts_len; } +static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, + const struct iphdr *iph, + __be16 tp_src, + __be16 tp_dst, + __be64 tun_id, + __be16 tun_flags, + const struct geneve_opt *opts, + u8 opts_len) +{ + __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, + iph->tos, iph->ttl, + tp_src, tp_dst, + tun_id, tun_flags, + opts, opts_len); +} + +#define OVS_SW_FLOW_KEY_METADATA_SIZE \ + (offsetof(struct sw_flow_key, recirc_id) + \ + FIELD_SIZEOF(struct sw_flow_key, recirc_id)) + struct sw_flow_key { u8 tun_opts[255]; u8 tun_opts_len; @@ -210,18 +244,19 @@ struct arp_eth_header { } __packed; void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, - struct sk_buff *); + const struct sk_buff *); void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, unsigned long *used, __be16 *tcp_flags); void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key); -int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb, +int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, + struct sk_buff *skb, struct sw_flow_key *key); /* Extract key from packet coming from userspace. */ int ovs_flow_key_extract_userspace(const struct nlattr *attr, struct sk_buff *skb, - struct sw_flow_key *key); + struct sw_flow_key *key, bool log); #endif /* flow.h */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ed3109761827..c0d066def228 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -112,7 +112,7 @@ static void update_range(struct sw_flow_match *match, } while (0) static bool match_validate(const struct sw_flow_match *match, - u64 key_attrs, u64 mask_attrs) + u64 key_attrs, u64 mask_attrs, bool log) { u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ @@ -230,21 +230,41 @@ static bool match_validate(const struct sw_flow_match *match, if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ - OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - (unsigned long long)key_attrs, (unsigned long long)key_expected); + OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", + (unsigned long long)key_attrs, + (unsigned long long)key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ - OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); + OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", + (unsigned long long)mask_attrs, + (unsigned long long)mask_allowed); return false; } return true; } +size_t ovs_tun_key_attr_size(void) +{ + /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider + * updating this function. + */ + return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ +} + size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider @@ -254,15 +274,7 @@ size_t ovs_key_attr_size(void) return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ - + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ - + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + + ovs_tun_key_attr_size() + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ @@ -318,7 +330,7 @@ static bool is_all_zero(const u8 *fp, size_t size) static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], - u64 *attrsp, bool nz) + u64 *attrsp, bool log, bool nz) { const struct nlattr *nla; u64 attrs; @@ -330,21 +342,20 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, int expected_len; if (type > OVS_KEY_ATTR_MAX) { - OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", + OVS_NLERR(log, "Key type %d is out of range max %d", type, OVS_KEY_ATTR_MAX); return -EINVAL; } if (attrs & (1 << type)) { - OVS_NLERR("Duplicate key attribute (type %d).\n", type); + OVS_NLERR(log, "Duplicate key (type %d).", type); return -EINVAL; } expected_len = ovs_key_lens[type]; if (nla_len(nla) != expected_len && expected_len != -1) { - OVS_NLERR("Key attribute has unexpected length (type=%d" - ", length=%d, expected=%d).\n", type, - nla_len(nla), expected_len); + OVS_NLERR(log, "Key %d has unexpected len %d expected %d", + type, nla_len(nla), expected_len); return -EINVAL; } @@ -354,7 +365,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } } if (rem) { - OVS_NLERR("Message has %d unknown bytes.\n", rem); + OVS_NLERR(log, "Message has %d unknown bytes.", rem); return -EINVAL; } @@ -363,28 +374,84 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } static int parse_flow_mask_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) + const struct nlattr *a[], u64 *attrsp, + bool log) { - return __parse_flow_nlattrs(attr, a, attrsp, true); + return __parse_flow_nlattrs(attr, a, attrsp, log, true); } static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) + const struct nlattr *a[], u64 *attrsp, + bool log) { - return __parse_flow_nlattrs(attr, a, attrsp, false); + return __parse_flow_nlattrs(attr, a, attrsp, log, false); +} + +static int genev_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + unsigned long opt_key_offset; + + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", + nla_len(a), sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (nla_len(a) % 4 != 0) { + OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", + nla_len(a)); + return -EINVAL; + } + + /* We need to record the length of the options passed + * down, otherwise packets with the same format but + * additional options will be silently matched. + */ + if (!is_mask) { + SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), + false); + } else { + /* This is somewhat unusual because it looks at + * both the key and mask while parsing the + * attributes (and by extension assumes the key + * is parsed first). Normally, we would verify + * that each is the correct length and that the + * attributes line up in the validate function. + * However, that is difficult because this is + * variable length and we won't have the + * information later. + */ + if (match->key->tun_opts_len != nla_len(a)) { + OVS_NLERR(log, "Geneve option len %d != mask len %d", + match->key->tun_opts_len, nla_len(a)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + } + + opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, + nla_len(a)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), + nla_len(a), is_mask); + return 0; } static int ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask) + struct sw_flow_match *match, bool is_mask, + bool log) { struct nlattr *a; int rem; bool ttl = false; __be16 tun_flags = 0; - unsigned long opt_key_offset; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); + int err; + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), @@ -393,20 +460,21 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_TTL] = 1, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), + [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), [OVS_TUNNEL_KEY_ATTR_OAM] = 0, [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { - OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", - type, OVS_TUNNEL_KEY_ATTR_MAX); + OVS_NLERR(log, "Tunnel attr %d out of range max %d", + type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } if (ovs_tunnel_key_lens[type] != nla_len(a) && ovs_tunnel_key_lens[type] != -1) { - OVS_NLERR("IPv4 tunnel attribute type has unexpected " - " length (type=%d, length=%d, expected=%d).\n", + OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", type, nla_len(a), ovs_tunnel_key_lens[type]); return -EINVAL; } @@ -440,62 +508,26 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_CSUM: tun_flags |= TUNNEL_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_TP_SRC: + SW_FLOW_KEY_PUT(match, tun_key.tp_src, + nla_get_be16(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TP_DST: + SW_FLOW_KEY_PUT(match, tun_key.tp_dst, + nla_get_be16(a), is_mask); + break; case OVS_TUNNEL_KEY_ATTR_OAM: tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: - tun_flags |= TUNNEL_OPTIONS_PRESENT; - if (nla_len(a) > sizeof(match->key->tun_opts)) { - OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n", - nla_len(a), - sizeof(match->key->tun_opts)); - return -EINVAL; - } - - if (nla_len(a) % 4 != 0) { - OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n", - nla_len(a)); - return -EINVAL; - } - - /* We need to record the length of the options passed - * down, otherwise packets with the same format but - * additional options will be silently matched. - */ - if (!is_mask) { - SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), - false); - } else { - /* This is somewhat unusual because it looks at - * both the key and mask while parsing the - * attributes (and by extension assumes the key - * is parsed first). Normally, we would verify - * that each is the correct length and that the - * attributes line up in the validate function. - * However, that is difficult because this is - * variable length and we won't have the - * information later. - */ - if (match->key->tun_opts_len != nla_len(a)) { - OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).", - match->key->tun_opts_len, - nla_len(a)); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, - true); - } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; - opt_key_offset = (unsigned long)GENEVE_OPTS( - (struct sw_flow_key *)0, - nla_len(a)); - SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, - nla_data(a), nla_len(a), - is_mask); + tun_flags |= TUNNEL_OPTIONS_PRESENT; break; default: - OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n", + OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", type); return -EINVAL; } @@ -504,18 +536,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); if (rem > 0) { - OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); + OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", + rem); return -EINVAL; } if (!is_mask) { if (!match->key->tun_key.ipv4_dst) { - OVS_NLERR("IPv4 tunnel destination address is zero.\n"); + OVS_NLERR(log, "IPv4 tunnel dst address is zero"); return -EINVAL; } if (!ttl) { - OVS_NLERR("IPv4 tunnel TTL not specified.\n"); + OVS_NLERR(log, "IPv4 tunnel TTL not specified."); return -EINVAL; } } @@ -548,6 +581,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_CSUM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; + if (output->tp_src && + nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) + return -EMSGSIZE; + if (output->tp_dst && + nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) + return -EMSGSIZE; if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; @@ -559,7 +598,6 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } - static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, const struct geneve_opt *tun_opts, @@ -580,8 +618,17 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } +int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, + const struct ovs_tunnel_info *egress_tun_info) +{ + return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel, + egress_tun_info->options, + egress_tun_info->options_len); +} + static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask) + const struct nlattr **a, bool is_mask, + bool log) { if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -609,7 +656,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, if (is_mask) { in_port = 0xffffffff; /* Always exact match in_port. */ } else if (in_port >= DP_MAX_PORTS) { - OVS_NLERR("Port (%d) exceeds maximum allowable (%d).\n", + OVS_NLERR(log, "Port %d exceeds max allowable %d", in_port, DP_MAX_PORTS); return -EINVAL; } @@ -628,7 +675,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask)) + is_mask, log)) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -636,11 +683,12 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) + const struct nlattr **a, bool is_mask, + bool log) { int err; - err = metadata_from_nlattrs(match, &attrs, a, is_mask); + err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); if (err) return err; @@ -661,9 +709,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { if (is_mask) - OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); + OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); else - OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); + OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); return -EINVAL; } @@ -680,8 +728,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, /* Always exact match EtherType. */ eth_type = htons(0xffff); } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { - OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", - ntohs(eth_type), ETH_P_802_3_MIN); + OVS_NLERR(log, "EtherType %x is less than min %x", + ntohs(eth_type), ETH_P_802_3_MIN); return -EINVAL; } @@ -696,8 +744,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", - ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); + OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", + ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ip.proto, @@ -720,8 +768,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", - ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); + OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", + ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ipv6.label, @@ -751,7 +799,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); if (!is_mask && (arp_key->arp_op & htons(0xff00))) { - OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", + OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", arp_key->arp_op); return -EINVAL; } @@ -852,7 +900,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, } if (attrs != 0) { - OVS_NLERR("Unknown key attributes (%llx).\n", + OVS_NLERR(log, "Unknown key attributes %llx", (unsigned long long)attrs); return -EINVAL; } @@ -893,10 +941,14 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * of this flow. * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink * attribute specifies the mask field of the wildcarded flow. + * @log: Boolean to allow kernel error logging. Normally true, but when + * probing for feature compatibility this should be passed in as false to + * suppress unnecessary error logging. */ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *nla_key, - const struct nlattr *nla_mask) + const struct nlattr *nla_mask, + bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; @@ -906,7 +958,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, bool encap_valid = false; int err; - err = parse_flow_nlattrs(nla_key, a, &key_attrs); + err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); if (err) return err; @@ -917,7 +969,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR("Invalid Vlan frame.\n"); + OVS_NLERR(log, "Invalid Vlan frame."); return -EINVAL; } @@ -928,22 +980,22 @@ int ovs_nla_get_match(struct sw_flow_match *match, encap_valid = true; if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs); + err = parse_flow_nlattrs(encap, a, &key_attrs, log); if (err) return err; } else if (!tci) { /* Corner case for truncated 802.1Q header. */ if (nla_len(encap)) { - OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); + OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); return -EINVAL; } } else { - OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); + OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); return -EINVAL; } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); if (err) return err; @@ -977,7 +1029,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, nla_mask = newmask; } - err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs); + err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); if (err) goto free_newmask; @@ -989,7 +1041,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, __be16 tci = 0; if (!encap_valid) { - OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); + OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); err = -EINVAL; goto free_newmask; } @@ -1001,12 +1053,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (eth_type == htons(0xffff)) { mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + err = parse_flow_mask_nlattrs(encap, a, + &mask_attrs, log); if (err) goto free_newmask; } else { - OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", - ntohs(eth_type)); + OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", + ntohs(eth_type)); err = -EINVAL; goto free_newmask; } @@ -1015,18 +1068,19 @@ int ovs_nla_get_match(struct sw_flow_match *match, tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); + OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", + ntohs(tci)); err = -EINVAL; goto free_newmask; } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); if (err) goto free_newmask; } - if (!match_validate(match, key_attrs, mask_attrs)) + if (!match_validate(match, key_attrs, mask_attrs, log)) err = -EINVAL; free_newmask: @@ -1039,6 +1093,9 @@ free_newmask: * @key: Receives extracted in_port, priority, tun_key and skb_mark. * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. + * @log: Boolean to allow kernel error logging. Normally true, but when + * probing for feature compatibility this should be passed in as false to + * suppress unnecessary error logging. * * This parses a series of Netlink attributes that form a flow key, which must * take the same form accepted by flow_from_nlattrs(), but only enough of it to @@ -1047,14 +1104,15 @@ free_newmask: */ int ovs_nla_get_flow_metadata(const struct nlattr *attr, - struct sw_flow_key *key) + struct sw_flow_key *key, + bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; struct sw_flow_match match; u64 attrs = 0; int err; - err = parse_flow_nlattrs(attr, a, &attrs); + err = parse_flow_nlattrs(attr, a, &attrs, log); if (err) return -EINVAL; @@ -1063,7 +1121,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, key->phy.in_port = DP_MAX_PORTS; - return metadata_from_nlattrs(&match, &attrs, a, false); + return metadata_from_nlattrs(&match, &attrs, a, false, log); } int ovs_nla_put_flow(const struct sw_flow_key *swkey, @@ -1283,12 +1341,12 @@ nla_put_failure: #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size) +static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) { struct sw_flow_actions *sfa; if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR("Flow action size (%u bytes) exceeds maximum", size); + OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); return ERR_PTR(-EINVAL); } @@ -1308,7 +1366,7 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, - int attr_len) + int attr_len, bool log) { struct sw_flow_actions *acts; @@ -1328,7 +1386,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size); + acts = nla_alloc_flow_actions(new_acts_size, log); if (IS_ERR(acts)) return (void *)acts; @@ -1343,11 +1401,11 @@ out: } static struct nlattr *__add_action(struct sw_flow_actions **sfa, - int attrtype, void *data, int len) + int attrtype, void *data, int len, bool log) { struct nlattr *a; - a = reserve_sfa_size(sfa, nla_attr_size(len)); + a = reserve_sfa_size(sfa, nla_attr_size(len), log); if (IS_ERR(a)) return a; @@ -1362,11 +1420,11 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa, } static int add_action(struct sw_flow_actions **sfa, int attrtype, - void *data, int len) + void *data, int len, bool log) { struct nlattr *a; - a = __add_action(sfa, attrtype, data, len); + a = __add_action(sfa, attrtype, data, len, log); if (IS_ERR(a)) return PTR_ERR(a); @@ -1374,12 +1432,12 @@ static int add_action(struct sw_flow_actions **sfa, int attrtype, } static inline int add_nested_action_start(struct sw_flow_actions **sfa, - int attrtype) + int attrtype, bool log) { int used = (*sfa)->actions_len; int err; - err = add_action(sfa, attrtype, NULL, 0); + err = add_action(sfa, attrtype, NULL, 0, log); if (err) return err; @@ -1398,12 +1456,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci); + __be16 eth_type, __be16 vlan_tci, bool log); static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci) + __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -1429,19 +1487,19 @@ static int validate_and_copy_sample(const struct nlattr *attr, return -EINVAL; /* validation done, copy sample action. */ - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32)); + nla_data(probability), sizeof(u32), log); if (err) return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); if (st_acts < 0) return st_acts; err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, - eth_type, vlan_tci); + eth_type, vlan_tci, log); if (err) return err; @@ -1478,7 +1536,7 @@ void ovs_match_init(struct sw_flow_match *match, } static int validate_and_copy_set_tun(const struct nlattr *attr, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { struct sw_flow_match match; struct sw_flow_key key; @@ -1487,7 +1545,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, int err, start; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); + err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); if (err) return err; @@ -1516,12 +1574,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; }; - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); if (start < 0) return start; a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, - sizeof(*tun_info) + key.tun_opts_len); + sizeof(*tun_info) + key.tun_opts_len, log); if (IS_ERR(a)) return PTR_ERR(a); @@ -1549,7 +1607,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun, __be16 eth_type) + bool *set_tun, __be16 eth_type, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1578,7 +1636,7 @@ static int validate_set(const struct nlattr *a, return -EINVAL; *set_tun = true; - err = validate_and_copy_set_tun(a, sfa); + err = validate_and_copy_set_tun(a, sfa, log); if (err) return err; break; @@ -1653,6 +1711,7 @@ static int validate_userspace(const struct nlattr *attr) static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, + [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; @@ -1670,12 +1729,12 @@ static int validate_userspace(const struct nlattr *attr) } static int copy_action(const struct nlattr *from, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { int totlen = NLA_ALIGN(from->nla_len); struct nlattr *to; - to = reserve_sfa_size(sfa, from->nla_len); + to = reserve_sfa_size(sfa, from->nla_len, log); if (IS_ERR(to)) return PTR_ERR(to); @@ -1686,7 +1745,7 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci) + __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *a; bool out_tnl_port = false; @@ -1809,7 +1868,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &out_tnl_port, eth_type); + &out_tnl_port, eth_type, log); if (err) return err; @@ -1818,18 +1877,18 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SAMPLE: err = validate_and_copy_sample(a, key, depth, sfa, - eth_type, vlan_tci); + eth_type, vlan_tci, log); if (err) return err; skip_copy = true; break; default: - OVS_NLERR("Unknown tunnel attribute (%d).\n", type); + OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; } if (!skip_copy) { - err = copy_action(a, sfa); + err = copy_action(a, sfa, log); if (err) return err; } @@ -1843,16 +1902,16 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr)); + *sfa = nla_alloc_flow_actions(nla_len(attr), log); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, - key->eth.tci); + key->eth.tci, log); if (err) kfree(*sfa); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index eb0b177300ad..577f12be3459 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -37,6 +37,7 @@ #include "flow.h" +size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, @@ -44,15 +45,17 @@ void ovs_match_init(struct sw_flow_match *match, int ovs_nla_put_flow(const struct sw_flow_key *, const struct sw_flow_key *, struct sk_buff *); -int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *); +int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, + bool log); -int ovs_nla_get_match(struct sw_flow_match *match, - const struct nlattr *, - const struct nlattr *); +int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, + const struct nlattr *mask, bool log); +int ovs_nla_put_egress_tunnel_key(struct sk_buff *, + const struct ovs_tunnel_info *); int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, - struct sw_flow_actions **sfa); + struct sw_flow_actions **sfa, bool log); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 90f8b40a350b..e0a7fefc1edf 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -107,7 +107,7 @@ err: return ERR_PTR(-ENOMEM); } -int ovs_flow_tbl_count(struct flow_table *table) +int ovs_flow_tbl_count(const struct flow_table *table) { return table->count; } @@ -401,7 +401,7 @@ static bool flow_cmp_masked_key(const struct sw_flow *flow, } bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - struct sw_flow_match *match) + const struct sw_flow_match *match) { struct sw_flow_key *key = match->key; int key_start = flow_key_start(key); @@ -412,7 +412,7 @@ bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, static struct sw_flow *masked_flow_lookup(struct table_instance *ti, const struct sw_flow_key *unmasked, - struct sw_flow_mask *mask) + const struct sw_flow_mask *mask) { struct sw_flow *flow; struct hlist_head *head; @@ -460,7 +460,7 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, } struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, - struct sw_flow_match *match) + const struct sw_flow_match *match) { struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow_mask *mask; @@ -563,7 +563,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, /* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, - struct sw_flow_mask *new) + const struct sw_flow_mask *new) { struct sw_flow_mask *mask; mask = flow_mask_find(tbl, new); @@ -586,7 +586,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, /* Must be called with OVS mutex held. */ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - struct sw_flow_mask *mask) + const struct sw_flow_mask *mask) { struct table_instance *new_ti = NULL; struct table_instance *ti; diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index f682c8c07f44..309fa6415689 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -61,12 +61,12 @@ struct sw_flow *ovs_flow_alloc(void); void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); -int ovs_flow_tbl_count(struct flow_table *table); +int ovs_flow_tbl_count(const struct flow_table *table); void ovs_flow_tbl_destroy(struct flow_table *table); int ovs_flow_tbl_flush(struct flow_table *flow_table); int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - struct sw_flow_mask *mask); + const struct sw_flow_mask *mask); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); int ovs_flow_tbl_num_masks(const struct flow_table *table); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, @@ -77,9 +77,9 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, - struct sw_flow_match *match); + const struct sw_flow_match *match); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - struct sw_flow_match *match); + const struct sw_flow_match *match); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 70c9765011f4..347fa2325b22 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -68,7 +68,7 @@ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) } /* Convert 24 bit VNI to 64 bit tunnel ID. */ -static __be64 vni_to_tunnel_id(__u8 *vni) +static __be64 vni_to_tunnel_id(const __u8 *vni) { #ifdef __BIG_ENDIAN return (vni[0] << 16) | (vni[1] << 8) | vni[2]; @@ -97,7 +97,9 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) key = vni_to_tunnel_id(geneveh->vni); - ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags, + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), + udp_hdr(skb)->source, udp_hdr(skb)->dest, + key, flags, geneveh->options, opts_len); ovs_vport_receive(vport, skb, &tun_info); @@ -228,6 +230,22 @@ static const char *geneve_get_name(const struct vport *vport) return geneve_port->name; } +static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + struct net *net = ovs_dp_get_net(vport->dp); + __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; + __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); + + /* Get tp_src and tp_dst, refert to geneve_build_header(). + */ + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, sport, dport); +} + static struct vport_ops ovs_geneve_vport_ops = { .type = OVS_VPORT_TYPE_GENEVE, .create = geneve_tnl_create, @@ -236,6 +254,7 @@ static struct vport_ops ovs_geneve_vport_ops = { .get_options = geneve_get_options, .send = geneve_tnl_send, .owner = THIS_MODULE, + .get_egress_tun_info = geneve_get_egress_tun_info, }; static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 00270b608844..8e61a5c6ae7c 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -108,7 +108,7 @@ static int gre_rcv(struct sk_buff *skb, return PACKET_REJECT; key = key_to_tunnel_id(tpi->key, tpi->seq); - ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key, filter_tnl_flags(tpi->flags), NULL, 0); ovs_vport_receive(vport, skb, &tun_info); @@ -284,12 +284,22 @@ static void gre_tnl_destroy(struct vport *vport) gre_exit(); } +static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_GRE, skb->mark, 0, 0); +} + static struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, .destroy = gre_tnl_destroy, .get_name = gre_get_name, .send = gre_tnl_send, + .get_egress_tun_info = gre_get_egress_tun_info, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 877ee74b4f08..4776282c6417 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -77,7 +77,7 @@ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -static struct net_device *get_dpdev(struct datapath *dp) +static struct net_device *get_dpdev(const struct datapath *dp) { struct vport *local; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 965e7500c5a6..38f95a52241b 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -69,7 +69,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); + ovs_flow_tun_info_init(&tun_info, iph, + udp_hdr(skb)->source, udp_hdr(skb)->dest, + key, TUNNEL_KEY, NULL, 0); ovs_vport_receive(vport, skb, &tun_info); } @@ -189,6 +191,25 @@ error: return err; } +static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct vxlan_port *vxlan_port = vxlan_vport(vport); + __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + __be16 src_port; + int port_min; + int port_max; + + inet_get_local_port_range(net, &port_min, &port_max); + src_port = udp_flow_src_port(net, skb, 0, 0, true); + + return ovs_tunnel_get_egress_info(egress_tun_info, net, + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, + src_port, dst_port); +} + static const char *vxlan_get_name(const struct vport *vport) { struct vxlan_port *vxlan_port = vxlan_vport(vport); @@ -202,6 +223,7 @@ static struct vport_ops ovs_vxlan_vport_ops = { .get_name = vxlan_get_name, .get_options = vxlan_get_options, .send = vxlan_tnl_send, + .get_egress_tun_info = vxlan_get_egress_tun_info, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 8168ef021337..e771a46933e5 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -68,7 +68,7 @@ void ovs_vport_exit(void) kfree(dev_table); } -static struct hlist_head *hash_bucket(struct net *net, const char *name) +static struct hlist_head *hash_bucket(const struct net *net, const char *name) { unsigned int hash = jhash(name, strlen(name), (unsigned long) net); return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; @@ -90,7 +90,7 @@ errout: ovs_unlock(); return err; } -EXPORT_SYMBOL(ovs_vport_ops_register); +EXPORT_SYMBOL_GPL(ovs_vport_ops_register); void ovs_vport_ops_unregister(struct vport_ops *ops) { @@ -98,7 +98,7 @@ void ovs_vport_ops_unregister(struct vport_ops *ops) list_del(&ops->list); ovs_unlock(); } -EXPORT_SYMBOL(ovs_vport_ops_unregister); +EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister); /** * ovs_vport_locate - find a port that has already been created @@ -107,7 +107,7 @@ EXPORT_SYMBOL(ovs_vport_ops_unregister); * * Must be called with ovs or RCU read lock. */ -struct vport *ovs_vport_locate(struct net *net, const char *name) +struct vport *ovs_vport_locate(const struct net *net, const char *name) { struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; @@ -165,7 +165,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, return vport; } -EXPORT_SYMBOL(ovs_vport_alloc); +EXPORT_SYMBOL_GPL(ovs_vport_alloc); /** * ovs_vport_free - uninitialize and free vport @@ -186,7 +186,7 @@ void ovs_vport_free(struct vport *vport) free_percpu(vport->percpu_stats); kfree(vport); } -EXPORT_SYMBOL(ovs_vport_free); +EXPORT_SYMBOL_GPL(ovs_vport_free); static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms) { @@ -380,7 +380,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * * Must be called with ovs_mutex. */ -int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids) +int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids) { struct vport_portids *old, *vport_portids; @@ -471,7 +471,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, - struct ovs_tunnel_info *tun_info) + const struct ovs_tunnel_info *tun_info) { struct pcpu_sw_netstats *stats; struct sw_flow_key key; @@ -493,7 +493,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, } ovs_dp_process_packet(skb, &key); } -EXPORT_SYMBOL(ovs_vport_receive); +EXPORT_SYMBOL_GPL(ovs_vport_receive); /** * ovs_vport_send - send a packet on a device @@ -572,4 +572,65 @@ void ovs_vport_deferred_free(struct vport *vport) call_rcu(&vport->rcu, free_vport_rcu); } -EXPORT_SYMBOL(ovs_vport_deferred_free); +EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); + +int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, + struct net *net, + const struct ovs_tunnel_info *tun_info, + u8 ipproto, + u32 skb_mark, + __be16 tp_src, + __be16 tp_dst) +{ + const struct ovs_key_ipv4_tunnel *tun_key; + struct rtable *rt; + struct flowi4 fl; + + if (unlikely(!tun_info)) + return -EINVAL; + + tun_key = &tun_info->tunnel; + + /* Route lookup to get srouce IP address. + * The process may need to be changed if the corresponding process + * in vports ops changed. + */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = tun_key->ipv4_dst; + fl.saddr = tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); + fl.flowi4_mark = skb_mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + + /* Generate egress_tun_info based on tun_info, + * saddr, tp_src and tp_dst + */ + __ovs_flow_tun_info_init(egress_tun_info, + fl.saddr, tun_key->ipv4_dst, + tun_key->ipv4_tos, + tun_key->ipv4_ttl, + tp_src, tp_dst, + tun_key->tun_id, + tun_key->tun_flags, + tun_info->options, + tun_info->options_len); + + return 0; +} +EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); + +int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *info) +{ + /* get_egress_tun_info() is only implemented on tunnel ports. */ + if (unlikely(!vport->ops->get_egress_tun_info)) + return -EINVAL; + + return vport->ops->get_egress_tun_info(vport, skb, info); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index e41c3facf799..99c8e71d9e6c 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -45,19 +45,29 @@ void ovs_vport_exit(void); struct vport *ovs_vport_add(const struct vport_parms *); void ovs_vport_del(struct vport *); -struct vport *ovs_vport_locate(struct net *net, const char *name); +struct vport *ovs_vport_locate(const struct net *net, const char *name); void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); int ovs_vport_set_options(struct vport *, struct nlattr *options); int ovs_vport_get_options(const struct vport *, struct sk_buff *); -int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids); +int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids); int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); int ovs_vport_send(struct vport *, struct sk_buff *); +int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, + struct net *net, + const struct ovs_tunnel_info *tun_info, + u8 ipproto, + u32 skb_mark, + __be16 tp_src, + __be16 tp_dst); +int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *info); + /* The following definitions are for implementers of vport devices: */ struct vport_err_stats { @@ -146,6 +156,8 @@ struct vport_parms { * @get_name: Get the device's name. * @send: Send a packet on the device. Returns the length of the packet sent, * zero for dropped packets or negative for error. + * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for + * a packet. */ struct vport_ops { enum ovs_vport_type type; @@ -161,6 +173,8 @@ struct vport_ops { const char *(*get_name)(const struct vport *); int (*send)(struct vport *, struct sk_buff *); + int (*get_egress_tun_info)(struct vport *, struct sk_buff *, + struct ovs_tunnel_info *); struct module *owner; struct list_head list; @@ -210,7 +224,7 @@ static inline struct vport *vport_from_priv(void *priv) } void ovs_vport_receive(struct vport *, struct sk_buff *, - struct ovs_tunnel_info *); + const struct ovs_tunnel_info *); static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 5a940dbd74a3..32ab87d34828 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -426,16 +426,17 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, out_dev = phonet_route_output(net, pn_sockaddr_get_addr(&sa)); if (!out_dev) { - LIMIT_NETDEBUG(KERN_WARNING"No Phonet route to %02X\n", - pn_sockaddr_get_addr(&sa)); + net_dbg_ratelimited("No Phonet route to %02X\n", + pn_sockaddr_get_addr(&sa)); goto out; } __skb_push(skb, sizeof(struct phonethdr)); skb->dev = out_dev; if (out_dev == dev) { - LIMIT_NETDEBUG(KERN_ERR"Phonet loop to %02X on %s\n", - pn_sockaddr_get_addr(&sa), dev->name); + net_dbg_ratelimited("Phonet loop to %02X on %s\n", + pn_sockaddr_get_addr(&sa), + dev->name); goto out_dev; } /* Some drivers (e.g. TUN) do not allocate HW header space */ diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index e9a83a637185..fa8237fdc57b 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -203,8 +203,7 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev) len = skb->len; err = pep_write(sk, skb); if (err) { - LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", - dev->name, err); + net_dbg_ratelimited("%s: TX error (%d)\n", dev->name, err); dev->stats.tx_aborted_errors++; dev->stats.tx_errors++; } else { diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 44b2123e22b8..9cd069dfaf65 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -272,8 +272,8 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) hdr = pnp_hdr(skb); if (hdr->data[0] != PN_PEP_TYPE_COMMON) { - LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", - (unsigned int)hdr->data[0]); + net_dbg_ratelimited("Phonet unknown PEP type: %u\n", + (unsigned int)hdr->data[0]); return -EOPNOTSUPP; } @@ -304,8 +304,8 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) break; default: - LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n", - (unsigned int)hdr->data[1]); + net_dbg_ratelimited("Phonet unknown PEP indication: %u\n", + (unsigned int)hdr->data[1]); return -EOPNOTSUPP; } if (wake) @@ -451,8 +451,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) break; default: - LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n", - hdr->message_id); + net_dbg_ratelimited("Phonet unknown PEP message: %u\n", + hdr->message_id); err = -EINVAL; } out: diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index d49dc2ed30ad..ce469d648ffb 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -205,9 +205,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) goto out_free; - if (!sctp_ulpevent_is_notification(event)) + if (!sctp_ulpevent_is_notification(event)) { sk_mark_napi_id(sk, skb); - + sk_incoming_cpu_update(sk); + } /* Check if the user wishes to receive this event. */ if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) goto out_free; |