summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c2
-rw-r--r--drivers/net/ethernet/via/via-velocity.c21
-rw-r--r--drivers/net/ethernet/xilinx/Kconfig4
-rw-r--r--drivers/net/macsec.c1
-rw-r--r--drivers/net/phy/Kconfig6
-rw-r--r--drivers/net/phy/mscc.c178
-rw-r--r--include/linux/if_vlan.h33
-rw-r--r--include/linux/inet_diag.h4
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/tcp.h7
-rw-r--r--include/net/netns/xfrm.h12
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/xfrm.h2
-rw-r--r--include/uapi/linux/inet_diag.h1
-rw-r--r--include/uapi/linux/openvswitch.h17
-rw-r--r--kernel/bpf/verifier.c55
-rw-r--r--net/core/rtnetlink.c4
-rw-r--r--net/core/skbuff.c19
-rw-r--r--net/ipv4/fib_trie.c10
-rw-r--r--net/ipv4/inet_diag.c49
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_input.c330
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/udp_diag.c10
-rw-r--r--net/ipv6/ip6_fib.c6
-rw-r--r--net/openvswitch/actions.c16
-rw-r--r--net/openvswitch/flow.c65
-rw-r--r--net/openvswitch/flow.h8
-rw-r--r--net/openvswitch/flow_netlink.c310
-rw-r--r--net/openvswitch/vport.c7
-rw-r--r--net/sctp/sctp_diag.c20
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/xfrm/xfrm_policy.c145
-rw-r--r--net/xfrm/xfrm_replay.c6
-rw-r--r--net/xfrm/xfrm_state.c121
-rw-r--r--samples/bpf/test_verifier.c102
38 files changed, 1025 insertions, 568 deletions
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index b4a56e61631a..578bbec1ce18 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2116,7 +2116,7 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev)
edev->accept_any_vlan = false;
}
-int qede_set_features(struct net_device *dev, netdev_features_t features)
+static int qede_set_features(struct net_device *dev, netdev_features_t features)
{
struct qede_dev *edev = netdev_priv(dev);
netdev_features_t changes = features ^ dev->features;
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index f38696ceee74..908e72e18ef7 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -1724,24 +1724,21 @@ static void velocity_free_tx_buf(struct velocity_info *vptr,
struct velocity_td_info *tdinfo, struct tx_desc *td)
{
struct sk_buff *skb = tdinfo->skb;
+ int i;
/*
* Don't unmap the pre-allocated tx_bufs
*/
- if (tdinfo->skb_dma) {
- int i;
+ for (i = 0; i < tdinfo->nskb_dma; i++) {
+ size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
- for (i = 0; i < tdinfo->nskb_dma; i++) {
- size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
+ /* For scatter-gather */
+ if (skb_shinfo(skb)->nr_frags > 0)
+ pktlen = max_t(size_t, pktlen,
+ td->td_buf[i].size & ~TD_QUEUE);
- /* For scatter-gather */
- if (skb_shinfo(skb)->nr_frags > 0)
- pktlen = max_t(size_t, pktlen,
- td->td_buf[i].size & ~TD_QUEUE);
-
- dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
- le16_to_cpu(pktlen), DMA_TO_DEVICE);
- }
+ dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
+ le16_to_cpu(pktlen), DMA_TO_DEVICE);
}
dev_kfree_skb_irq(skb);
tdinfo->skb = NULL;
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index 4f5c024c6192..6d68c8a8f4f2 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -5,7 +5,7 @@
config NET_VENDOR_XILINX
bool "Xilinx devices"
default y
- depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ
+ depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS
---help---
If you have a network (Ethernet) card belonging to this class, say Y.
@@ -18,7 +18,7 @@ if NET_VENDOR_XILINX
config XILINX_EMACLITE
tristate "Xilinx 10/100 Ethernet Lite support"
- depends on (PPC32 || MICROBLAZE || ARCH_ZYNQ)
+ depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS
select PHYLIB
---help---
This driver supports the 10/100 Ethernet Lite from Xilinx.
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 351e701eb043..3ea47f28e143 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2973,6 +2973,7 @@ static void macsec_setup(struct net_device *dev)
dev->priv_flags |= IFF_NO_QUEUE;
dev->netdev_ops = &macsec_netdev_ops;
dev->destructor = macsec_free_netdev;
+ SET_NETDEV_DEVTYPE(dev, &macsec_type);
eth_zero_addr(dev->broadcast);
}
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 1c3e07c3d0b8..87b566f54cc1 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -274,9 +274,9 @@ config MICROCHIP_PHY
Supports the LAN88XX PHYs.
config MICROSEMI_PHY
- tristate "Microsemi PHYs"
- ---help---
- Currently supports the VSC8531 and VSC8541 PHYs
+ tristate "Microsemi PHYs"
+ ---help---
+ Currently supports the VSC8531 and VSC8541 PHYs
config NATIONAL_PHY
tristate "National Semiconductor PHYs"
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index ad33390b382a..c09cc4a3d166 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -13,135 +13,135 @@
#include <linux/phy.h>
enum rgmii_rx_clock_delay {
- RGMII_RX_CLK_DELAY_0_2_NS = 0,
- RGMII_RX_CLK_DELAY_0_8_NS = 1,
- RGMII_RX_CLK_DELAY_1_1_NS = 2,
- RGMII_RX_CLK_DELAY_1_7_NS = 3,
- RGMII_RX_CLK_DELAY_2_0_NS = 4,
- RGMII_RX_CLK_DELAY_2_3_NS = 5,
- RGMII_RX_CLK_DELAY_2_6_NS = 6,
- RGMII_RX_CLK_DELAY_3_4_NS = 7
+ RGMII_RX_CLK_DELAY_0_2_NS = 0,
+ RGMII_RX_CLK_DELAY_0_8_NS = 1,
+ RGMII_RX_CLK_DELAY_1_1_NS = 2,
+ RGMII_RX_CLK_DELAY_1_7_NS = 3,
+ RGMII_RX_CLK_DELAY_2_0_NS = 4,
+ RGMII_RX_CLK_DELAY_2_3_NS = 5,
+ RGMII_RX_CLK_DELAY_2_6_NS = 6,
+ RGMII_RX_CLK_DELAY_3_4_NS = 7
};
-#define MII_VSC85XX_INT_MASK 25
-#define MII_VSC85XX_INT_MASK_MASK 0xa000
-#define MII_VSC85XX_INT_STATUS 26
+#define MII_VSC85XX_INT_MASK 25
+#define MII_VSC85XX_INT_MASK_MASK 0xa000
+#define MII_VSC85XX_INT_STATUS 26
-#define MSCC_EXT_PAGE_ACCESS 31
-#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */
-#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */
+#define MSCC_EXT_PAGE_ACCESS 31
+#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */
+#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */
/* Extended Page 2 Registers */
-#define MSCC_PHY_RGMII_CNTL 20
-#define RGMII_RX_CLK_DELAY_MASK 0x0070
-#define RGMII_RX_CLK_DELAY_POS 4
+#define MSCC_PHY_RGMII_CNTL 20
+#define RGMII_RX_CLK_DELAY_MASK 0x0070
+#define RGMII_RX_CLK_DELAY_POS 4
/* Microsemi PHY ID's */
-#define PHY_ID_VSC8531 0x00070570
-#define PHY_ID_VSC8541 0x00070770
+#define PHY_ID_VSC8531 0x00070570
+#define PHY_ID_VSC8541 0x00070770
static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
{
- int rc;
+ int rc;
- rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page);
- return rc;
+ rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page);
+ return rc;
}
static int vsc85xx_default_config(struct phy_device *phydev)
{
- int rc;
- u16 reg_val;
+ int rc;
+ u16 reg_val;
- mutex_lock(&phydev->lock);
- rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
- if (rc != 0)
- goto out_unlock;
+ mutex_lock(&phydev->lock);
+ rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
+ if (rc != 0)
+ goto out_unlock;
- reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
- reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
- reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
- phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
- rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+ reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
+ reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
+ reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
+ phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
+ rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
out_unlock:
- mutex_unlock(&phydev->lock);
+ mutex_unlock(&phydev->lock);
- return rc;
+ return rc;
}
static int vsc85xx_config_init(struct phy_device *phydev)
{
- int rc;
+ int rc;
- rc = vsc85xx_default_config(phydev);
- if (rc)
- return rc;
- rc = genphy_config_init(phydev);
+ rc = vsc85xx_default_config(phydev);
+ if (rc)
+ return rc;
+ rc = genphy_config_init(phydev);
- return rc;
+ return rc;
}
static int vsc85xx_ack_interrupt(struct phy_device *phydev)
{
- int rc = 0;
+ int rc = 0;
- if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
- rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+ rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
- return (rc < 0) ? rc : 0;
+ return (rc < 0) ? rc : 0;
}
static int vsc85xx_config_intr(struct phy_device *phydev)
{
- int rc;
-
- if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
- rc = phy_write(phydev, MII_VSC85XX_INT_MASK,
- MII_VSC85XX_INT_MASK_MASK);
- } else {
- rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0);
- if (rc < 0)
- return rc;
- rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
- }
-
- return rc;
+ int rc;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+ rc = phy_write(phydev, MII_VSC85XX_INT_MASK,
+ MII_VSC85XX_INT_MASK_MASK);
+ } else {
+ rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0);
+ if (rc < 0)
+ return rc;
+ rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+ }
+
+ return rc;
}
/* Microsemi VSC85xx PHYs */
static struct phy_driver vsc85xx_driver[] = {
{
- .phy_id = PHY_ID_VSC8531,
- .name = "Microsemi VSC8531",
- .phy_id_mask = 0xfffffff0,
- .features = PHY_GBIT_FEATURES,
- .flags = PHY_HAS_INTERRUPT,
- .soft_reset = &genphy_soft_reset,
- .config_init = &vsc85xx_config_init,
- .config_aneg = &genphy_config_aneg,
- .aneg_done = &genphy_aneg_done,
- .read_status = &genphy_read_status,
- .ack_interrupt = &vsc85xx_ack_interrupt,
- .config_intr = &vsc85xx_config_intr,
- .suspend = &genphy_suspend,
- .resume = &genphy_resume,
+ .phy_id = PHY_ID_VSC8531,
+ .name = "Microsemi VSC8531",
+ .phy_id_mask = 0xfffffff0,
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .soft_reset = &genphy_soft_reset,
+ .config_init = &vsc85xx_config_init,
+ .config_aneg = &genphy_config_aneg,
+ .aneg_done = &genphy_aneg_done,
+ .read_status = &genphy_read_status,
+ .ack_interrupt = &vsc85xx_ack_interrupt,
+ .config_intr = &vsc85xx_config_intr,
+ .suspend = &genphy_suspend,
+ .resume = &genphy_resume,
},
{
- .phy_id = PHY_ID_VSC8541,
- .name = "Microsemi VSC8541 SyncE",
- .phy_id_mask = 0xfffffff0,
- .features = PHY_GBIT_FEATURES,
- .flags = PHY_HAS_INTERRUPT,
- .soft_reset = &genphy_soft_reset,
- .config_init = &vsc85xx_config_init,
- .config_aneg = &genphy_config_aneg,
- .aneg_done = &genphy_aneg_done,
- .read_status = &genphy_read_status,
- .ack_interrupt = &vsc85xx_ack_interrupt,
- .config_intr = &vsc85xx_config_intr,
- .suspend = &genphy_suspend,
- .resume = &genphy_resume,
+ .phy_id = PHY_ID_VSC8541,
+ .name = "Microsemi VSC8541 SyncE",
+ .phy_id_mask = 0xfffffff0,
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .soft_reset = &genphy_soft_reset,
+ .config_init = &vsc85xx_config_init,
+ .config_aneg = &genphy_config_aneg,
+ .aneg_done = &genphy_aneg_done,
+ .read_status = &genphy_read_status,
+ .ack_interrupt = &vsc85xx_ack_interrupt,
+ .config_intr = &vsc85xx_config_intr,
+ .suspend = &genphy_suspend,
+ .resume = &genphy_resume,
}
};
@@ -149,9 +149,9 @@ static struct phy_driver vsc85xx_driver[] = {
module_phy_driver(vsc85xx_driver);
static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
- { PHY_ID_VSC8531, 0xfffffff0, },
- { PHY_ID_VSC8541, 0xfffffff0, },
- { }
+ { PHY_ID_VSC8531, 0xfffffff0, },
+ { PHY_ID_VSC8541, 0xfffffff0, },
+ { }
};
MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 49d4aef1f789..3319d97d789d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -272,6 +272,23 @@ static inline int vlan_get_encap_level(struct net_device *dev)
}
#endif
+/**
+ * eth_type_vlan - check for valid vlan ether type.
+ * @ethertype: ether type to check
+ *
+ * Returns true if the ether type is a vlan ether type.
+ */
+static inline bool eth_type_vlan(__be16 ethertype)
+{
+ switch (ethertype) {
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline bool vlan_hw_offload_capable(netdev_features_t features,
__be16 proto)
{
@@ -425,8 +442,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
{
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data;
- if (veth->h_vlan_proto != htons(ETH_P_8021Q) &&
- veth->h_vlan_proto != htons(ETH_P_8021AD))
+ if (!eth_type_vlan(veth->h_vlan_proto))
return -EINVAL;
*vlan_tci = ntohs(veth->h_vlan_TCI);
@@ -488,7 +504,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type,
* present at mac_len - VLAN_HLEN (if mac_len > 0), or at
* ETH_HLEN otherwise
*/
- if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+ if (eth_type_vlan(type)) {
if (vlan_depth) {
if (WARN_ON(vlan_depth < VLAN_HLEN))
return 0;
@@ -506,8 +522,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type,
vh = (struct vlan_hdr *)(skb->data + vlan_depth);
type = vh->h_vlan_encapsulated_proto;
vlan_depth += VLAN_HLEN;
- } while (type == htons(ETH_P_8021Q) ||
- type == htons(ETH_P_8021AD));
+ } while (eth_type_vlan(type));
}
if (depth)
@@ -572,8 +587,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
static inline bool skb_vlan_tagged(const struct sk_buff *skb)
{
if (!skb_vlan_tag_present(skb) &&
- likely(skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)))
+ likely(!eth_type_vlan(skb->protocol)))
return false;
return true;
@@ -593,15 +607,14 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
if (!skb_vlan_tag_present(skb)) {
struct vlan_ethhdr *veh;
- if (likely(protocol != htons(ETH_P_8021Q) &&
- protocol != htons(ETH_P_8021AD)))
+ if (likely(!eth_type_vlan(protocol)))
return false;
veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
}
- if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD))
+ if (!eth_type_vlan(protocol))
return false;
return true;
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index feb04ea20f11..65da430e260f 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct sk_buff *skb, const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
u32 pid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh);
+ const struct nlmsghdr *unlh, bool net_admin);
void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
@@ -56,7 +56,7 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
struct inet_diag_msg *r, int ext,
- struct user_namespace *user_ns);
+ struct user_namespace *user_ns, bool net_admin);
extern int inet_diag_register(const struct inet_diag_handler *handler);
extern void inet_diag_unregister(const struct inet_diag_handler *handler);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cfb7219be665..4c5662f05bda 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2402,6 +2402,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
kfree_skb(skb);
}
+void skb_rbtree_purge(struct rb_root *root);
+
void *netdev_alloc_frag(unsigned int fragsz);
struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7be9b1242354..c723a465125d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -281,10 +281,9 @@ struct tcp_sock {
struct sk_buff* lost_skb_hint;
struct sk_buff *retransmit_skb_hint;
- /* OOO segments go in this list. Note that socket lock must be held,
- * as we do not use sk_buff_head lock.
- */
- struct sk_buff_head out_of_order_queue;
+ /* OOO segments go in this rbtree. Socket lock must be held. */
+ struct rb_root out_of_order_queue;
+ struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */
/* SACKs data, these 2 need to be together (see tcp_options_write) */
struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 24cd3949a9a4..27bb9633c69d 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -11,7 +11,7 @@
struct ctl_table_header;
struct xfrm_policy_hash {
- struct hlist_head *table;
+ struct hlist_head __rcu *table;
unsigned int hmask;
u8 dbits4;
u8 sbits4;
@@ -38,14 +38,12 @@ struct netns_xfrm {
* mode. Also, it can be used by ah/esp icmp error handler to find
* offending SA.
*/
- struct hlist_head *state_bydst;
- struct hlist_head *state_bysrc;
- struct hlist_head *state_byspi;
+ struct hlist_head __rcu *state_bydst;
+ struct hlist_head __rcu *state_bysrc;
+ struct hlist_head __rcu *state_byspi;
unsigned int state_hmask;
unsigned int state_num;
struct work_struct state_hash_work;
- struct hlist_head state_gc_list;
- struct work_struct state_gc_work;
struct list_head policy_all;
struct hlist_head *policy_byidx;
@@ -73,7 +71,7 @@ struct netns_xfrm {
struct dst_ops xfrm6_dst_ops;
#endif
spinlock_t xfrm_state_lock;
- rwlock_t xfrm_policy_lock;
+ spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
/* flow cache part */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d6ae36512429..fdfbedd61c67 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -640,7 +640,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (skb_queue_empty(&tp->out_of_order_queue) &&
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
tp->rcv_wnd &&
atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
!tp->urg_data)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index adfebd6f243c..d2fdd6d70959 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -187,7 +187,7 @@ struct xfrm_state {
struct xfrm_replay_state_esn *preplay_esn;
/* The functions for replay detection. */
- struct xfrm_replay *repl;
+ const struct xfrm_replay *repl;
/* internal flag that only holds state for delayed aevent at the
* moment
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 5581206a08ae..b5c366f87b3e 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -123,6 +123,7 @@ enum {
INET_DIAG_LOCALS,
INET_DIAG_PEERS,
INET_DIAG_PAD,
+ INET_DIAG_MARK,
__INET_DIAG_MAX,
};
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 54c3b4f4aceb..59ed3992c760 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -605,13 +605,13 @@ struct ovs_action_push_mpls {
* @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
* (but it will not be set in the 802.1Q header that is pushed).
*
- * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID
- * values are those that the kernel module also parses as 802.1Q headers, to
- * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
- * from having surprising results.
+ * The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD.
+ * The only acceptable TPID values are those that the kernel module also parses
+ * as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed
+ * by %OVS_ACTION_ATTR_POP_VLAN from having surprising results.
*/
struct ovs_action_push_vlan {
- __be16 vlan_tpid; /* 802.1Q TPID. */
+ __be16 vlan_tpid; /* 802.1Q or 802.1ad TPID. */
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
};
@@ -721,9 +721,10 @@ enum ovs_nat_attr {
* is copied from the value to the packet header field, rest of the bits are
* left unchanged. The non-masked value bits must be passed in as zeroes.
* Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
- * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
- * packet.
- * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
+ * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header
+ * onto the packet.
+ * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header
+ * from the packet.
* @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
* the nested %OVS_SAMPLE_ATTR_* attributes.
* @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48c2705db22c..90493a66dddd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1637,21 +1637,42 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static void find_good_pkt_pointers(struct verifier_env *env,
- struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct verifier_state *state,
+ const struct reg_state *dst_reg)
{
- struct verifier_state *state = &env->cur_state;
struct reg_state *regs = state->regs, *reg;
int i;
- /* r2 = r3;
- * r2 += 8
- * if (r2 > pkt_end) goto somewhere
- * r2 == dst_reg, pkt_end == src_reg,
- * r2=pkt(id=n,off=8,r=0)
- * r3=pkt(id=n,off=0,r=0)
- * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
- * so that range of bytes [r3, r3 + 8) is safe to access
+
+ /* LLVM can generate two kind of checks:
+ *
+ * Type 1:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (r2 > pkt_end) goto <handle exception>
+ * <access okay>
+ *
+ * Where:
+ * r2 == dst_reg, pkt_end == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Type 2:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (pkt_end >= r2) goto <access okay>
+ * <handle exception>
+ *
+ * Where:
+ * pkt_end == dst_reg, r2 == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
+ * so that range of bytes [r3, r3 + 8) is safe to access.
*/
+
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
regs[i].range = dst_reg->off;
@@ -1668,8 +1689,8 @@ static void find_good_pkt_pointers(struct verifier_env *env,
static int check_cond_jmp_op(struct verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
- struct reg_state *regs = env->cur_state.regs, *dst_reg;
- struct verifier_state *other_branch;
+ struct verifier_state *other_branch, *this_branch = &env->cur_state;
+ struct reg_state *regs = this_branch->regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1750,13 +1771,17 @@ static int check_cond_jmp_op(struct verifier_env *env,
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
- find_good_pkt_pointers(env, dst_reg);
+ find_good_pkt_pointers(this_branch, dst_reg);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+ dst_reg->type == PTR_TO_PACKET_END &&
+ regs[insn->src_reg].type == PTR_TO_PACKET) {
+ find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
} else if (is_pointer_value(env, insn->dst_reg)) {
verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
return -EACCES;
}
if (log_level)
- print_verifier_state(&env->cur_state);
+ print_verifier_state(this_branch);
return 0;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1dfca1c3f8f5..937e459bdaa9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3669,10 +3669,6 @@ nla_put_failure:
return -EMSGSIZE;
}
-static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
- [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) },
-};
-
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3864b4b68fa1..1e329d411242 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2445,6 +2445,25 @@ void skb_queue_purge(struct sk_buff_head *list)
EXPORT_SYMBOL(skb_queue_purge);
/**
+ * skb_rbtree_purge - empty a skb rbtree
+ * @root: root of the rbtree to empty
+ *
+ * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+ * the list and one reference dropped. This function does not take
+ * any lock. Synchronization should be handled by the caller (e.g., TCP
+ * out-of-order queue is protected by the socket lock).
+ */
+void skb_rbtree_purge(struct rb_root *root)
+{
+ struct sk_buff *skb, *next;
+
+ rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
+ kfree_skb(skb);
+
+ *root = RB_ROOT;
+}
+
+/**
* skb_queue_head - queue a buffer at the list head
* @list: list to use
* @newsk: buffer to queue
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e2ffc2a5c7db..241f27bbd7ad 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1081,7 +1081,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
struct trie *t = (struct trie *)tb->tb_data;
struct fib_alias *fa, *new_fa;
struct key_vector *l, *tp;
- unsigned int nlflags = 0;
+ u16 nlflags = NLM_F_EXCL;
struct fib_info *fi;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
@@ -1126,6 +1126,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
if (cfg->fc_nlflags & NLM_F_EXCL)
goto out;
+ nlflags &= ~NLM_F_EXCL;
+
/* We have 2 goals:
* 1. Find exact match for type, scope, fib_info to avoid
* duplicate routes
@@ -1151,6 +1153,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
struct fib_info *fi_drop;
u8 state;
+ nlflags |= NLM_F_REPLACE;
fa = fa_first;
if (fa_match) {
if (fa == fa_match)
@@ -1191,7 +1194,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
if (state & FA_S_ACCESSED)
rt_cache_flush(cfg->fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
- tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
+ tb->tb_id, &cfg->fc_nlinfo, nlflags);
goto succeeded;
}
@@ -1203,7 +1206,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
goto out;
if (cfg->fc_nlflags & NLM_F_APPEND)
- nlflags = NLM_F_APPEND;
+ nlflags |= NLM_F_APPEND;
else
fa = fa_first;
}
@@ -1211,6 +1214,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
if (!(cfg->fc_nlflags & NLM_F_CREATE))
goto out;
+ nlflags |= NLM_F_CREATE;
err = -ENOBUFS;
new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
if (!new_fa)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index abfbe492ebfe..e4d16fc5bbb3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -99,6 +99,7 @@ static size_t inet_sk_attr_size(void)
+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ + nla_total_size(4) /* INET_DIAG_MARK */
+ nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -109,7 +110,8 @@ static size_t inet_sk_attr_size(void)
int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
struct inet_diag_msg *r, int ext,
- struct user_namespace *user_ns)
+ struct user_namespace *user_ns,
+ bool net_admin)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -136,6 +138,9 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
#endif
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+ goto errout;
+
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
@@ -149,7 +154,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct sk_buff *skb, const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
u32 portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+ const struct nlmsghdr *unlh,
+ bool net_admin)
{
const struct tcp_congestion_ops *ca_ops;
const struct inet_diag_handler *handler;
@@ -175,7 +181,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->idiag_timer = 0;
r->idiag_retrans = 0;
- if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+ if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
goto errout;
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
@@ -274,10 +280,11 @@ static int inet_csk_diag_fill(struct sock *sk,
const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
u32 portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+ const struct nlmsghdr *unlh,
+ bool net_admin)
{
- return inet_sk_diag_fill(sk, inet_csk(sk), skb, req,
- user_ns, portid, seq, nlmsg_flags, unlh);
+ return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
+ portid, seq, nlmsg_flags, unlh, net_admin);
}
static int inet_twsk_diag_fill(struct sock *sk,
@@ -319,8 +326,9 @@ static int inet_twsk_diag_fill(struct sock *sk,
static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
u32 portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+ const struct nlmsghdr *unlh, bool net_admin)
{
+ struct request_sock *reqsk = inet_reqsk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
long tmo;
@@ -334,7 +342,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
inet_diag_msg_common_fill(r, sk);
r->idiag_state = TCP_SYN_RECV;
r->idiag_timer = 1;
- r->idiag_retrans = inet_reqsk(sk)->num_retrans;
+ r->idiag_retrans = reqsk->num_retrans;
BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
offsetof(struct sock, sk_cookie));
@@ -346,6 +354,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
r->idiag_uid = 0;
r->idiag_inode = 0;
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+ inet_rsk(reqsk)->ir_mark))
+ return -EMSGSIZE;
+
nlmsg_end(skb, nlh);
return 0;
}
@@ -354,7 +366,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct inet_diag_req_v2 *r,
struct user_namespace *user_ns,
u32 portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+ const struct nlmsghdr *unlh, bool net_admin)
{
if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill(sk, skb, portid, seq,
@@ -362,10 +374,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
if (sk->sk_state == TCP_NEW_SYN_RECV)
return inet_req_diag_fill(sk, skb, portid, seq,
- nlmsg_flags, unlh);
+ nlmsg_flags, unlh, net_admin);
return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
- nlmsg_flags, unlh);
+ nlmsg_flags, unlh, net_admin);
}
struct sock *inet_diag_find_one_icsk(struct net *net,
@@ -435,7 +447,8 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
err = sk_diag_fill(sk, rep, req,
sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0, nlh);
+ nlh->nlmsg_seq, 0, nlh,
+ netlink_net_capable(in_skb, CAP_NET_ADMIN));
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
nlmsg_free(rep);
@@ -796,7 +809,8 @@ static int inet_csk_diag_dump(struct sock *sk,
struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
- const struct nlattr *bc)
+ const struct nlattr *bc,
+ bool net_admin)
{
if (!inet_diag_bc_sk(bc, sk))
return 0;
@@ -804,7 +818,8 @@ static int inet_csk_diag_dump(struct sock *sk,
return inet_csk_diag_fill(sk, skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh,
+ net_admin);
}
static void twsk_build_assert(void)
@@ -840,6 +855,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int i, num, s_i, s_num;
u32 idiag_states = r->idiag_states;
+ bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
if (idiag_states & TCPF_SYN_RECV)
idiag_states |= TCPF_NEW_SYN_RECV;
@@ -880,7 +896,8 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
cb->args[3] > 0)
goto next_listen;
- if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r,
+ bc, net_admin) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -948,7 +965,7 @@ skip_listen_ht:
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->nlh);
+ cb->nlh, net_admin);
if (res < 0) {
spin_unlock_bh(lock);
goto done;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 71a52f4d4cff..af4919792b6a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -284,9 +284,12 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->ttl = val;
break;
case IP_TOS:
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
+ val = *(int *)CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
+ val = *(u8 *)CMSG_DATA(cmsg);
+ else
return -EINVAL;
- val = *(int *)CMSG_DATA(cmsg);
if (val < 0 || val > 255)
return -EINVAL;
ipc->tos = val;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 77311a92275c..a13fcb369f52 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -380,7 +380,7 @@ void tcp_init_sock(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- __skb_queue_head_init(&tp->out_of_order_queue);
+ tp->out_of_order_queue = RB_ROOT;
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
INIT_LIST_HEAD(&tp->tsq_node);
@@ -2243,7 +2243,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
tcp_write_queue_purge(sk);
- __skb_queue_purge(&tp->out_of_order_queue);
+ skb_rbtree_purge(&tp->out_of_order_queue);
inet->inet_dport = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8cd02c0b056c..a5934c4c8cd4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4108,7 +4108,7 @@ void tcp_fin(struct sock *sk)
/* It _is_ possible, that we have something out-of-order _after_ FIN.
* Probably, we should reset in this case. For now drop them.
*/
- __skb_queue_purge(&tp->out_of_order_queue);
+ skb_rbtree_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk);
@@ -4268,7 +4268,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
int this_sack;
/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
- if (skb_queue_empty(&tp->out_of_order_queue)) {
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
tp->rx_opt.num_sacks = 0;
return;
}
@@ -4344,10 +4344,13 @@ static void tcp_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
__u32 dsack_high = tp->rcv_nxt;
+ bool fin, fragstolen, eaten;
struct sk_buff *skb, *tail;
- bool fragstolen, eaten;
+ struct rb_node *p;
- while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+ p = rb_first(&tp->out_of_order_queue);
+ while (p) {
+ skb = rb_entry(p, struct sk_buff, rbnode);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
@@ -4357,9 +4360,10 @@ static void tcp_ofo_queue(struct sock *sk)
dsack_high = TCP_SKB_CB(skb)->end_seq;
tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, &tp->out_of_order_queue);
- __skb_unlink(skb, &tp->out_of_order_queue);
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+ if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
SOCK_DEBUG(sk, "ofo packet was already received\n");
tcp_drop(sk, skb);
continue;
@@ -4371,12 +4375,19 @@ static void tcp_ofo_queue(struct sock *sk)
tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+ fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
__skb_queue_tail(&sk->sk_receive_queue, skb);
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- tcp_fin(sk);
- if (eaten)
+ else
kfree_skb_partial(skb, fragstolen);
+
+ if (unlikely(fin)) {
+ tcp_fin(sk);
+ /* tcp_fin() purges tp->out_of_order_queue,
+ * so we must end this loop right now.
+ */
+ break;
+ }
}
}
@@ -4403,8 +4414,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct rb_node **p, *q, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
+ bool fragstolen;
tcp_ecn_check_ce(tp, skb);
@@ -4419,88 +4432,85 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
inet_csk_schedule_ack(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+ seq = TCP_SKB_CB(skb)->seq;
+ end_seq = TCP_SKB_CB(skb)->end_seq;
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ tp->rcv_nxt, seq, end_seq);
- skb1 = skb_peek_tail(&tp->out_of_order_queue);
- if (!skb1) {
+ p = &tp->out_of_order_queue.rb_node;
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
/* Initial out of order segment, build 1 SACK. */
if (tcp_is_sack(tp)) {
tp->rx_opt.num_sacks = 1;
- tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
- tp->selective_acks[0].end_seq =
- TCP_SKB_CB(skb)->end_seq;
+ tp->selective_acks[0].start_seq = seq;
+ tp->selective_acks[0].end_seq = end_seq;
}
- __skb_queue_head(&tp->out_of_order_queue, skb);
+ rb_link_node(&skb->rbnode, NULL, p);
+ rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+ tp->ooo_last_skb = skb;
goto end;
}
- seq = TCP_SKB_CB(skb)->seq;
- end_seq = TCP_SKB_CB(skb)->end_seq;
-
- if (seq == TCP_SKB_CB(skb1)->end_seq) {
- bool fragstolen;
-
- if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
- } else {
- tcp_grow_window(sk, skb);
- kfree_skb_partial(skb, fragstolen);
- skb = NULL;
+ /* In the typical case, we are adding an skb to the end of the list.
+ * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+ */
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+coalesce_done:
+ tcp_grow_window(sk, skb);
+ kfree_skb_partial(skb, fragstolen);
+ skb = NULL;
+ goto add_sack;
+ }
+
+ /* Find place to insert this segment. Handle overlaps on the way. */
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ if (before(seq, TCP_SKB_CB(skb1)->seq)) {
+ p = &parent->rb_left;
+ continue;
}
-
- if (!tp->rx_opt.num_sacks ||
- tp->selective_acks[0].end_seq != seq)
- goto add_sack;
-
- /* Common case: data arrive in order after hole. */
- tp->selective_acks[0].end_seq = end_seq;
- goto end;
- }
-
- /* Find place to insert this segment. */
- while (1) {
- if (!after(TCP_SKB_CB(skb1)->seq, seq))
- break;
- if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
- skb1 = NULL;
- break;
+ if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+ if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ /* All the bits are present. Drop. */
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPOFOMERGE);
+ __kfree_skb(skb);
+ skb = NULL;
+ tcp_dsack_set(sk, seq, end_seq);
+ goto add_sack;
+ }
+ if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+ /* Partial overlap. */
+ tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
+ } else {
+ /* skb's seq == skb1's seq and skb covers skb1.
+ * Replace skb1 with skb.
+ */
+ rb_replace_node(&skb1->rbnode, &skb->rbnode,
+ &tp->out_of_order_queue);
+ tcp_dsack_extend(sk,
+ TCP_SKB_CB(skb1)->seq,
+ TCP_SKB_CB(skb1)->end_seq);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPOFOMERGE);
+ __kfree_skb(skb1);
+ goto add_sack;
+ }
+ } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+ goto coalesce_done;
}
- skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
+ p = &parent->rb_right;
}
- /* Do skb overlap to previous one? */
- if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
- if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- /* All the bits are present. Drop. */
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb);
- skb = NULL;
- tcp_dsack_set(sk, seq, end_seq);
- goto add_sack;
- }
- if (after(seq, TCP_SKB_CB(skb1)->seq)) {
- /* Partial overlap. */
- tcp_dsack_set(sk, seq,
- TCP_SKB_CB(skb1)->end_seq);
- } else {
- if (skb_queue_is_first(&tp->out_of_order_queue,
- skb1))
- skb1 = NULL;
- else
- skb1 = skb_queue_prev(
- &tp->out_of_order_queue,
- skb1);
- }
- }
- if (!skb1)
- __skb_queue_head(&tp->out_of_order_queue, skb);
- else
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+ /* Insert segment into RB tree. */
+ rb_link_node(&skb->rbnode, parent, p);
+ rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
- /* And clean segments covered by new one as whole. */
- while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
- skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+ /* Remove other segments covered by skb. */
+ while ((q = rb_next(&skb->rbnode)) != NULL) {
+ skb1 = rb_entry(q, struct sk_buff, rbnode);
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
@@ -4509,12 +4519,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
end_seq);
break;
}
- __skb_unlink(skb1, &tp->out_of_order_queue);
+ rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
tcp_drop(sk, skb1);
}
+ /* If there is no skb after us, we are the last_skb ! */
+ if (!q)
+ tp->ooo_last_skb = skb;
add_sack:
if (tcp_is_sack(tp))
@@ -4651,13 +4664,13 @@ queue_and_out:
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
+ if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
tcp_ofo_queue(sk);
/* RFC2581. 4.2. SHOULD send immediate ACK, when
* gap in queue is filled.
*/
- if (skb_queue_empty(&tp->out_of_order_queue))
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
inet_csk(sk)->icsk_ack.pingpong = 0;
}
@@ -4711,48 +4724,76 @@ drop:
tcp_data_queue_ofo(sk, skb);
}
+static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
+{
+ if (list)
+ return !skb_queue_is_last(list, skb) ? skb->next : NULL;
+
+ return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+}
+
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *list)
+ struct sk_buff_head *list,
+ struct rb_root *root)
{
- struct sk_buff *next = NULL;
+ struct sk_buff *next = tcp_skb_next(skb, list);
- if (!skb_queue_is_last(list, skb))
- next = skb_queue_next(list, skb);
+ if (list)
+ __skb_unlink(skb, list);
+ else
+ rb_erase(&skb->rbnode, root);
- __skb_unlink(skb, list);
__kfree_skb(skb);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
return next;
}
+/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
+static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct sk_buff *skb1;
+
+ while (*p) {
+ parent = *p;
+ skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ rb_link_node(&skb->rbnode, parent, p);
+ rb_insert_color(&skb->rbnode, root);
+}
+
/* Collapse contiguous sequence of skbs head..tail with
* sequence numbers start..end.
*
- * If tail is NULL, this means until the end of the list.
+ * If tail is NULL, this means until the end of the queue.
*
* Segments with FIN/SYN are not collapsed (only because this
* simplifies code)
*/
static void
-tcp_collapse(struct sock *sk, struct sk_buff_head *list,
- struct sk_buff *head, struct sk_buff *tail,
- u32 start, u32 end)
+tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
+ struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
{
- struct sk_buff *skb, *n;
+ struct sk_buff *skb = head, *n;
+ struct sk_buff_head tmp;
bool end_of_skbs;
/* First, check that queue is collapsible and find
- * the point where collapsing can be useful. */
- skb = head;
+ * the point where collapsing can be useful.
+ */
restart:
- end_of_skbs = true;
- skb_queue_walk_from_safe(list, skb, n) {
- if (skb == tail)
- break;
+ for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
+ n = tcp_skb_next(skb, list);
+
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- skb = tcp_collapse_one(sk, skb, list);
+ skb = tcp_collapse_one(sk, skb, list, root);
if (!skb)
break;
goto restart;
@@ -4770,13 +4811,10 @@ restart:
break;
}
- if (!skb_queue_is_last(list, skb)) {
- struct sk_buff *next = skb_queue_next(list, skb);
- if (next != tail &&
- TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
- end_of_skbs = false;
- break;
- }
+ if (n && n != tail &&
+ TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
+ end_of_skbs = false;
+ break;
}
/* Decided to skip this, advance start seq. */
@@ -4786,17 +4824,22 @@ restart:
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
+ __skb_queue_head_init(&tmp);
+
while (before(start, end)) {
int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
struct sk_buff *nskb;
nskb = alloc_skb(copy, GFP_ATOMIC);
if (!nskb)
- return;
+ break;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
- __skb_queue_before(list, skb, nskb);
+ if (list)
+ __skb_queue_before(list, skb, nskb);
+ else
+ __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
skb_set_owner_r(nskb, sk);
/* Copy data, releasing collapsed skbs. */
@@ -4814,14 +4857,17 @@ restart:
start += size;
}
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- skb = tcp_collapse_one(sk, skb, list);
+ skb = tcp_collapse_one(sk, skb, list, root);
if (!skb ||
skb == tail ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
- return;
+ goto end;
}
}
}
+end:
+ skb_queue_walk_safe(&tmp, skb, n)
+ tcp_rbtree_insert(root, skb);
}
/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
@@ -4830,43 +4876,43 @@ restart:
static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
- struct sk_buff *head;
+ struct sk_buff *skb, *head;
+ struct rb_node *p;
u32 start, end;
- if (!skb)
+ p = rb_first(&tp->out_of_order_queue);
+ skb = rb_entry_safe(p, struct sk_buff, rbnode);
+new_range:
+ if (!skb) {
+ p = rb_last(&tp->out_of_order_queue);
+ /* Note: This is possible p is NULL here. We do not
+ * use rb_entry_safe(), as ooo_last_skb is valid only
+ * if rbtree is not empty.
+ */
+ tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
return;
-
+ }
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
- head = skb;
-
- for (;;) {
- struct sk_buff *next = NULL;
- if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
- next = skb_queue_next(&tp->out_of_order_queue, skb);
- skb = next;
+ for (head = skb;;) {
+ skb = tcp_skb_next(skb, NULL);
- /* Segment is terminated when we see gap or when
- * we are at the end of all the queue. */
+ /* Range is terminated when we see a gap or when
+ * we are at the queue end.
+ */
if (!skb ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
- tcp_collapse(sk, &tp->out_of_order_queue,
+ tcp_collapse(sk, NULL, &tp->out_of_order_queue,
head, skb, start, end);
- head = skb;
- if (!skb)
- break;
- /* Start new segment */
+ goto new_range;
+ }
+
+ if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
start = TCP_SKB_CB(skb)->seq;
+ if (after(TCP_SKB_CB(skb)->end_seq, end))
end = TCP_SKB_CB(skb)->end_seq;
- } else {
- if (before(TCP_SKB_CB(skb)->seq, start))
- start = TCP_SKB_CB(skb)->seq;
- if (after(TCP_SKB_CB(skb)->end_seq, end))
- end = TCP_SKB_CB(skb)->end_seq;
- }
}
}
@@ -4883,20 +4929,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
static bool tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
+ struct rb_node *node, *prev;
- if (skb_queue_empty(&tp->out_of_order_queue))
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
return false;
NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
-
- while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue)) != NULL) {
- tcp_drop(sk, skb);
+ node = &tp->ooo_last_skb->rbnode;
+ do {
+ prev = rb_prev(node);
+ rb_erase(node, &tp->out_of_order_queue);
+ tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk))
break;
- }
+ node = prev;
+ } while (node);
+ tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
@@ -4930,7 +4980,7 @@ static int tcp_prune_queue(struct sock *sk)
tcp_collapse_ofo_queue(sk);
if (!skb_queue_empty(&sk->sk_receive_queue))
- tcp_collapse(sk, &sk->sk_receive_queue,
+ tcp_collapse(sk, &sk->sk_receive_queue, NULL,
skb_peek(&sk->sk_receive_queue),
NULL,
tp->copied_seq, tp->rcv_nxt);
@@ -5035,7 +5085,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* We have out of order data. */
- (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
+ (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a75bf48d7950..04b989328558 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
tcp_write_queue_purge(sk);
/* Cleans up our, hopefully empty, out_of_order_queue. */
- __skb_queue_purge(&tp->out_of_order_queue);
+ skb_rbtree_purge(&tp->out_of_order_queue);
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4b95ec4ed2c8..f63c73dc0acb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -488,7 +488,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->snd_cwnd_cnt = 0;
tcp_init_xmit_timers(newsk);
- __skb_queue_head_init(&newtp->out_of_order_queue);
newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
newtp->rx_opt.saw_tstamp = 0;
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 58b79c0c0d69..9a89c10a55f0 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -20,7 +20,7 @@
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *req,
- struct nlattr *bc)
+ struct nlattr *bc, bool net_admin)
{
if (!inet_diag_bc_sk(bc, sk))
return 0;
@@ -28,7 +28,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
return inet_sk_diag_fill(sk, NULL, skb, req,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin);
}
static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
@@ -76,7 +76,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
err = inet_sk_diag_fill(sk, NULL, rep, req,
sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0, nlh);
+ nlh->nlmsg_seq, 0, nlh,
+ netlink_net_capable(in_skb, CAP_NET_ADMIN));
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(rep);
@@ -97,6 +98,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
+ bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
struct net *net = sock_net(skb->sk);
int num, s_num, slot, s_slot;
@@ -132,7 +134,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
r->id.idiag_dport)
goto next;
- if (sk_diag_dump(sk, skb, cb, r, bc) < 0) {
+ if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) {
spin_unlock_bh(&hslot->lock);
goto done;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 771be1fa4176..ef5485204522 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -743,6 +743,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ u16 nlflags = NLM_F_EXCL;
int err;
ins = &fn->leaf;
@@ -759,6 +760,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_EXCL))
return -EEXIST;
+
+ nlflags &= ~NLM_F_EXCL;
if (replace) {
if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
found++;
@@ -856,6 +859,7 @@ next_iter:
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
+ nlflags |= NLM_F_CREATE;
err = fib6_commit_metrics(&rt->dst, mxc);
if (err)
return err;
@@ -864,7 +868,7 @@ add:
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info, 0);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
if (!(fn->fn_flags & RTN_RTINFO)) {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ca91fc33f8a9..4fe9032b1160 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -246,20 +246,24 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
int err;
err = skb_vlan_pop(skb);
- if (skb_vlan_tag_present(skb))
+ if (skb_vlan_tag_present(skb)) {
invalidate_flow_key(key);
- else
- key->eth.tci = 0;
+ } else {
+ key->eth.vlan.tci = 0;
+ key->eth.vlan.tpid = 0;
+ }
return err;
}
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
- if (skb_vlan_tag_present(skb))
+ if (skb_vlan_tag_present(skb)) {
invalidate_flow_key(key);
- else
- key->eth.tci = vlan->vlan_tci;
+ } else {
+ key->eth.vlan.tci = vlan->vlan_tci;
+ key->eth.vlan.tpid = vlan->vlan_tpid;
+ }
return skb_vlan_push(skb, vlan->vlan_tpid,
ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 0ea128eeeab2..1240ae3b88d2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -302,24 +302,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
sizeof(struct icmp6hdr));
}
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+/**
+ * Parse vlan tag from vlan header.
+ * Returns ERROR on memory error.
+ * Returns 0 if it encounters a non-vlan or incomplete packet.
+ * Returns 1 after successfully parsing vlan tag.
+ */
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
{
- struct qtag_prefix {
- __be16 eth_type; /* ETH_P_8021Q */
- __be16 tci;
- };
- struct qtag_prefix *qp;
+ struct vlan_head *vh = (struct vlan_head *)skb->data;
- if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+ if (likely(!eth_type_vlan(vh->tpid)))
return 0;
- if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
- sizeof(__be16))))
+ if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
+ return 0;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
+ sizeof(__be16))))
return -ENOMEM;
- qp = (struct qtag_prefix *) skb->data;
- key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
- __skb_pull(skb, sizeof(struct qtag_prefix));
+ vh = (struct vlan_head *)skb->data;
+ key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
+ key_vh->tpid = vh->tpid;
+
+ __skb_pull(skb, sizeof(struct vlan_head));
+ return 1;
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ int res;
+
+ key->eth.vlan.tci = 0;
+ key->eth.vlan.tpid = 0;
+ key->eth.cvlan.tci = 0;
+ key->eth.cvlan.tpid = 0;
+
+ if (likely(skb_vlan_tag_present(skb))) {
+ key->eth.vlan.tci = htons(skb->vlan_tci);
+ key->eth.vlan.tpid = skb->vlan_proto;
+ } else {
+ /* Parse outer vlan tag in the non-accelerated case. */
+ res = parse_vlan_tag(skb, &key->eth.vlan);
+ if (res <= 0)
+ return res;
+ }
+
+ /* Parse inner vlan tag. */
+ res = parse_vlan_tag(skb, &key->eth.cvlan);
+ if (res <= 0)
+ return res;
return 0;
}
@@ -480,12 +513,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
* update skb->csum here.
*/
- key->eth.tci = 0;
- if (skb_vlan_tag_present(skb))
- key->eth.tci = htons(skb->vlan_tci);
- else if (eth->h_proto == htons(ETH_P_8021Q))
- if (unlikely(parse_vlan(skb, key)))
- return -ENOMEM;
+ if (unlikely(parse_vlan(skb, key)))
+ return -ENOMEM;
key->eth.type = parse_ethertype(skb);
if (unlikely(key->eth.type == htons(0)))
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 03378e75a67c..156a3029c17b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -50,6 +50,11 @@ struct ovs_tunnel_info {
struct metadata_dst *tun_dst;
};
+struct vlan_head {
+ __be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
+ __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+};
+
#define OVS_SW_FLOW_KEY_METADATA_SIZE \
(offsetof(struct sw_flow_key, recirc_id) + \
FIELD_SIZEOF(struct sw_flow_key, recirc_id))
@@ -69,7 +74,8 @@ struct sw_flow_key {
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
- __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+ struct vlan_head vlan;
+ struct vlan_head cvlan;
__be16 type; /* Ethernet frame type. */
} eth;
union {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c78a6a1476fb..8efa718ddb5e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -808,6 +808,167 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb,
ip_tunnel_info_af(tun_info));
}
+static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
+ const struct nlattr *a[],
+ bool is_mask, bool inner)
+{
+ __be16 tci = 0;
+ __be16 tpid = 0;
+
+ if (a[OVS_KEY_ATTR_VLAN])
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+ if (a[OVS_KEY_ATTR_ETHERTYPE])
+ tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+ if (likely(!inner)) {
+ SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
+ SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
+ SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
+ }
+ return 0;
+}
+
+static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
+ u64 key_attrs, bool inner,
+ const struct nlattr **a, bool log)
+{
+ __be16 tci = 0;
+
+ if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+ eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
+ /* Not a VLAN. */
+ return 0;
+ }
+
+ if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+ OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
+ return -EINVAL;
+ }
+
+ if (a[OVS_KEY_ATTR_VLAN])
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ if (tci) {
+ OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
+ (inner) ? "C-VLAN" : "VLAN");
+ return -EINVAL;
+ } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
+ /* Corner case for truncated VLAN header. */
+ OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
+ (inner) ? "C-VLAN" : "VLAN");
+ return -EINVAL;
+ }
+ }
+
+ return 1;
+}
+
+static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
+ u64 key_attrs, bool inner,
+ const struct nlattr **a, bool log)
+{
+ __be16 tci = 0;
+ __be16 tpid = 0;
+ bool encap_valid = !!(match->key->eth.vlan.tci &
+ htons(VLAN_TAG_PRESENT));
+ bool i_encap_valid = !!(match->key->eth.cvlan.tci &
+ htons(VLAN_TAG_PRESENT));
+
+ if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
+ /* Not a VLAN. */
+ return 0;
+ }
+
+ if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
+ OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
+ (inner) ? "C-VLAN" : "VLAN");
+ return -EINVAL;
+ }
+
+ if (a[OVS_KEY_ATTR_VLAN])
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+ if (a[OVS_KEY_ATTR_ETHERTYPE])
+ tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+ if (tpid != htons(0xffff)) {
+ OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
+ (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
+ return -EINVAL;
+ }
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
+ (inner) ? "C-VLAN" : "VLAN");
+ return -EINVAL;
+ }
+
+ return 1;
+}
+
+static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
+ u64 *key_attrs, bool inner,
+ const struct nlattr **a, bool is_mask,
+ bool log)
+{
+ int err;
+ const struct nlattr *encap;
+
+ if (!is_mask)
+ err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
+ a, log);
+ else
+ err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
+ a, log);
+ if (err <= 0)
+ return err;
+
+ err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
+ if (err)
+ return err;
+
+ *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+ *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+ *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+
+ encap = a[OVS_KEY_ATTR_ENCAP];
+
+ if (!is_mask)
+ err = parse_flow_nlattrs(encap, a, key_attrs, log);
+ else
+ err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
+
+ return err;
+}
+
+static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
+ u64 *key_attrs, const struct nlattr **a,
+ bool is_mask, bool log)
+{
+ int err;
+ bool encap_valid = false;
+
+ err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
+ is_mask, log);
+ if (err)
+ return err;
+
+ encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
+ if (encap_valid) {
+ err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
+ is_mask, log);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
u64 *attrs, const struct nlattr **a,
bool is_mask, bool log)
@@ -923,20 +1084,11 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
}
if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
- __be16 tci;
-
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
- if (!(tci & htons(VLAN_TAG_PRESENT))) {
- if (is_mask)
- OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
- else
- OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
-
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+ /* VLAN attribute is always parsed before getting here since it
+ * may occur multiple times.
+ */
+ OVS_NLERR(log, "VLAN attribute unexpected.");
+ return -EINVAL;
}
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
@@ -1182,49 +1334,18 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
bool log)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
- const struct nlattr *encap;
struct nlattr *newmask = NULL;
u64 key_attrs = 0;
u64 mask_attrs = 0;
- bool encap_valid = false;
int err;
err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
if (err)
return err;
- if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
- (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
- (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
- __be16 tci;
-
- if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
- (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
- OVS_NLERR(log, "Invalid Vlan frame.");
- return -EINVAL;
- }
-
- key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
- encap = a[OVS_KEY_ATTR_ENCAP];
- key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
- encap_valid = true;
-
- if (tci & htons(VLAN_TAG_PRESENT)) {
- err = parse_flow_nlattrs(encap, a, &key_attrs, log);
- if (err)
- return err;
- } else if (!tci) {
- /* Corner case for truncated 802.1Q header. */
- if (nla_len(encap)) {
- OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
- return -EINVAL;
- }
- } else {
- OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
- return -EINVAL;
- }
- }
+ err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
+ if (err)
+ return err;
err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
if (err)
@@ -1265,46 +1386,12 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
goto free_newmask;
/* Always match on tci. */
- SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
-
- if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
- __be16 eth_type = 0;
- __be16 tci = 0;
-
- if (!encap_valid) {
- OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
- err = -EINVAL;
- goto free_newmask;
- }
-
- mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
- if (a[OVS_KEY_ATTR_ETHERTYPE])
- eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
- if (eth_type == htons(0xffff)) {
- mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- encap = a[OVS_KEY_ATTR_ENCAP];
- err = parse_flow_mask_nlattrs(encap, a,
- &mask_attrs, log);
- if (err)
- goto free_newmask;
- } else {
- OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
- ntohs(eth_type));
- err = -EINVAL;
- goto free_newmask;
- }
-
- if (a[OVS_KEY_ATTR_VLAN])
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+ SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
+ SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
- if (!(tci & htons(VLAN_TAG_PRESENT))) {
- OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
- ntohs(tci));
- err = -EINVAL;
- goto free_newmask;
- }
- }
+ err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
+ if (err)
+ goto free_newmask;
err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
log);
@@ -1410,12 +1497,25 @@ int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
}
+static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
+ bool is_mask)
+{
+ __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
+
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+ nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
+ return -EMSGSIZE;
+ return 0;
+}
+
static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, bool is_mask,
struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
- struct nlattr *nla, *encap;
+ struct nlattr *nla;
+ struct nlattr *encap = NULL;
+ struct nlattr *in_encap = NULL;
if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
goto nla_put_failure;
@@ -1464,17 +1564,21 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ether_addr_copy(eth_key->eth_src, output->eth.src);
ether_addr_copy(eth_key->eth_dst, output->eth.dst);
- if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
- __be16 eth_type;
- eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
- nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+ if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
+ if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
goto nla_put_failure;
encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
- if (!swkey->eth.tci)
+ if (!swkey->eth.vlan.tci)
goto unencap;
- } else
- encap = NULL;
+
+ if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+ if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+ goto nla_put_failure;
+ in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+ if (!swkey->eth.cvlan.tci)
+ goto unencap;
+ }
+ }
if (swkey->eth.type == htons(ETH_P_802_2)) {
/*
@@ -1493,6 +1597,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
goto nla_put_failure;
+ if (eth_type_vlan(swkey->eth.type)) {
+ /* There are 3 VLAN tags, we don't know anything about the rest
+ * of the packet, so truncate here.
+ */
+ WARN_ON_ONCE(!(encap && in_encap));
+ goto unencap;
+ }
+
if (swkey->eth.type == htons(ETH_P_IP)) {
struct ovs_key_ipv4 *ipv4_key;
@@ -1619,6 +1731,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
}
unencap:
+ if (in_encap)
+ nla_nest_end(skb, in_encap);
if (encap)
nla_nest_end(skb, encap);
@@ -2283,7 +2397,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_PUSH_VLAN:
vlan = nla_data(a);
- if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+ if (!eth_type_vlan(vlan->vlan_tpid))
return -EINVAL;
if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
return -EINVAL;
@@ -2388,7 +2502,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
- key->eth.tci, log);
+ key->eth.vlan.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6b21fd068d87..8f198437c724 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -485,9 +485,14 @@ static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
- if (skb->protocol == htons(ETH_P_8021Q))
+ if (skb_vlan_tagged(skb))
length -= VLAN_HLEN;
+ /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
+ * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
+ * account for 802.1ad. e.g. is_skb_forwardable().
+ */
+
return length;
}
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index f3508aa75815..807158e32f5f 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -106,7 +106,8 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
int portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+ const struct nlmsghdr *unlh,
+ bool net_admin)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct list_head *addr_list;
@@ -133,7 +134,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
r->idiag_retrans = 0;
}
- if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+ if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
goto errout;
if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
@@ -203,6 +204,7 @@ struct sctp_comm_param {
struct netlink_callback *cb;
const struct inet_diag_req_v2 *r;
const struct nlmsghdr *nlh;
+ bool net_admin;
};
static size_t inet_assoc_attr_size(struct sctp_association *asoc)
@@ -219,6 +221,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ + nla_total_size(4) /* INET_DIAG_MARK */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
+ nla_total_size(sizeof(struct inet_diag_meminfo))
@@ -256,7 +259,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
err = inet_sctp_diag_fill(sk, assoc, rep, req,
sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0, nlh);
+ nlh->nlmsg_seq, 0, nlh,
+ commp->net_admin);
release_sock(sk);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
@@ -310,7 +314,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->nlh) < 0) {
+ NLM_F_MULTI, cb->nlh,
+ commp->net_admin) < 0) {
cb->args[3] = 1;
err = 2;
goto release;
@@ -320,7 +325,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
if (inet_sctp_diag_fill(sk, assoc, skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) {
+ cb->nlh->nlmsg_seq, 0, cb->nlh,
+ commp->net_admin) < 0) {
err = 2;
goto release;
}
@@ -375,7 +381,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->nlh) < 0) {
+ cb->nlh, commp->net_admin) < 0) {
err = 2;
goto out;
}
@@ -412,6 +418,7 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb,
.skb = in_skb,
.r = req,
.nlh = nlh,
+ .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
};
if (req->sdiag_family == AF_INET) {
@@ -447,6 +454,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
.skb = skb,
.cb = cb,
.r = r,
+ .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
};
/* eps hashtable dumps
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index a750f330b8dd..f83b74d3e2ac 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1500,12 +1500,8 @@ out_fac_release:
goto out_dtefac_release;
if (dtefacs.calling_len > X25_MAX_AE_LEN)
goto out_dtefac_release;
- if (dtefacs.calling_ae == NULL)
- goto out_dtefac_release;
if (dtefacs.called_len > X25_MAX_AE_LEN)
goto out_dtefac_release;
- if (dtefacs.called_ae == NULL)
- goto out_dtefac_release;
x25->dte_facilities = dtefacs;
rc = 0;
out_dtefac_release:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b5e665b3cfb0..f7ce6265961a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
static struct kmem_cache *xfrm_dst_cache __read_mostly;
+static __read_mostly seqcount_t xfrm_policy_hash_generation;
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
@@ -59,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir);
+static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
+{
+ return atomic_inc_not_zero(&policy->refcnt);
+}
+
static inline bool
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
{
@@ -385,9 +391,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net,
__get_hash_thresh(net, family, dir, &dbits, &sbits);
hash = __sel_hash(sel, family, hmask, dbits, sbits);
- return (hash == hmask + 1 ?
- &net->xfrm.policy_inexact[dir] :
- net->xfrm.policy_bydst[dir].table + hash);
+ if (hash == hmask + 1)
+ return &net->xfrm.policy_inexact[dir];
+
+ return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
+ lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
}
static struct hlist_head *policy_hash_direct(struct net *net,
@@ -403,7 +411,8 @@ static struct hlist_head *policy_hash_direct(struct net *net,
__get_hash_thresh(net, family, dir, &dbits, &sbits);
hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
- return net->xfrm.policy_bydst[dir].table + hash;
+ return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
+ lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
}
static void xfrm_dst_hash_transfer(struct net *net,
@@ -426,14 +435,14 @@ redo:
h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
pol->family, nhashmask, dbits, sbits);
if (!entry0) {
- hlist_del(&pol->bydst);
- hlist_add_head(&pol->bydst, ndsttable+h);
+ hlist_del_rcu(&pol->bydst);
+ hlist_add_head_rcu(&pol->bydst, ndsttable + h);
h0 = h;
} else {
if (h != h0)
continue;
- hlist_del(&pol->bydst);
- hlist_add_behind(&pol->bydst, entry0);
+ hlist_del_rcu(&pol->bydst);
+ hlist_add_behind_rcu(&pol->bydst, entry0);
}
entry0 = &pol->bydst;
}
@@ -468,22 +477,32 @@ static void xfrm_bydst_resize(struct net *net, int dir)
unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
unsigned int nhashmask = xfrm_new_hash_mask(hmask);
unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
- struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
struct hlist_head *ndst = xfrm_hash_alloc(nsize);
+ struct hlist_head *odst;
int i;
if (!ndst)
return;
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ write_seqcount_begin(&xfrm_policy_hash_generation);
+
+ odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
+ lockdep_is_held(&net->xfrm.xfrm_policy_lock));
+
+ odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
+ lockdep_is_held(&net->xfrm.xfrm_policy_lock));
for (i = hmask; i >= 0; i--)
xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
- net->xfrm.policy_bydst[dir].table = ndst;
+ rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
net->xfrm.policy_bydst[dir].hmask = nhashmask;
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ write_seqcount_end(&xfrm_policy_hash_generation);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+
+ synchronize_rcu();
xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
}
@@ -500,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
if (!nidx)
return;
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
for (i = hmask; i >= 0; i--)
xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
@@ -508,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
net->xfrm.policy_byidx = nidx;
net->xfrm.policy_idx_hmask = nhashmask;
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
}
@@ -541,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
{
- read_lock_bh(&net->xfrm.xfrm_policy_lock);
si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
@@ -550,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
si->spdhcnt = net->xfrm.policy_idx_hmask;
si->spdhmcnt = xfrm_policy_hashmax;
- read_unlock_bh(&net->xfrm.xfrm_policy_lock);
}
EXPORT_SYMBOL(xfrm_spd_getinfo);
@@ -600,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
rbits6 = net->xfrm.policy_hthresh.rbits6;
} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
/* reset the bydst and inexact table in all directions */
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
@@ -642,7 +659,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
hlist_add_head(&policy->bydst, chain);
}
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex);
}
@@ -753,7 +770,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct hlist_head *chain;
struct hlist_node *newpos;
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
delpol = NULL;
newpos = NULL;
@@ -764,7 +781,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
if (excl) {
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return -EEXIST;
}
delpol = pol;
@@ -800,7 +817,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ))
xfrm_pol_hold(policy);
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (delpol)
xfrm_policy_kill(delpol);
@@ -820,7 +837,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
struct hlist_head *chain;
*err = 0;
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, sel, sel->family, dir);
ret = NULL;
hlist_for_each_entry(pol, chain, bydst) {
@@ -833,7 +850,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
*err = security_xfrm_policy_delete(
pol->security);
if (*err) {
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return pol;
}
__xfrm_policy_unlink(pol, dir);
@@ -842,7 +859,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
break;
}
}
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete)
xfrm_policy_kill(ret);
@@ -861,7 +878,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
return NULL;
*err = 0;
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = net->xfrm.policy_byidx + idx_hash(net, id);
ret = NULL;
hlist_for_each_entry(pol, chain, byidx) {
@@ -872,7 +889,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
*err = security_xfrm_policy_delete(
pol->security);
if (*err) {
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return pol;
}
__xfrm_policy_unlink(pol, dir);
@@ -881,7 +898,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
break;
}
}
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete)
xfrm_policy_kill(ret);
@@ -939,7 +956,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
{
int dir, err = 0, cnt = 0;
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
err = xfrm_policy_flush_secctx_check(net, type, task_valid);
if (err)
@@ -955,14 +972,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (pol->type != type)
continue;
__xfrm_policy_unlink(pol, dir);
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
goto again1;
}
@@ -974,13 +991,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (pol->type != type)
continue;
__xfrm_policy_unlink(pol, dir);
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
goto again2;
}
}
@@ -989,7 +1006,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (!cnt)
err = -ESRCH;
out:
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return err;
}
EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1009,7 +1026,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
if (list_empty(&walk->walk.all) && walk->seq != 0)
return 0;
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
if (list_empty(&walk->walk.all))
x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
else
@@ -1037,7 +1054,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
}
list_del_init(&walk->walk.all);
out:
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return error;
}
EXPORT_SYMBOL(xfrm_policy_walk);
@@ -1056,9 +1073,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
if (list_empty(&walk->walk.all))
return;
- write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
list_del(&walk->walk.all);
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
}
EXPORT_SYMBOL(xfrm_policy_walk_done);
@@ -1096,17 +1113,24 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
struct xfrm_policy *pol, *ret;
const xfrm_address_t *daddr, *saddr;
struct hlist_head *chain;
- u32 priority = ~0U;
+ unsigned int sequence;
+ u32 priority;
daddr = xfrm_flowi_daddr(fl, family);
saddr = xfrm_flowi_saddr(fl, family);
if (unlikely(!daddr || !saddr))
return NULL;
- read_lock_bh(&net->xfrm.xfrm_policy_lock);
- chain = policy_hash_direct(net, daddr, saddr, family, dir);
+ rcu_read_lock();
+ retry:
+ do {
+ sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+ chain = policy_hash_direct(net, daddr, saddr, family, dir);
+ } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+
+ priority = ~0U;
ret = NULL;
- hlist_for_each_entry(pol, chain, bydst) {
+ hlist_for_each_entry_rcu(pol, chain, bydst) {
err = xfrm_policy_match(pol, fl, type, family, dir);
if (err) {
if (err == -ESRCH)
@@ -1122,7 +1146,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
}
}
chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry(pol, chain, bydst) {
+ hlist_for_each_entry_rcu(pol, chain, bydst) {
if ((pol->priority >= priority) && ret)
break;
@@ -1140,9 +1164,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
}
}
- xfrm_pol_hold(ret);
+ if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+ goto retry;
+
+ if (ret && !xfrm_pol_hold_rcu(ret))
+ goto retry;
fail:
- read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ rcu_read_unlock();
return ret;
}
@@ -1219,10 +1247,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
const struct flowi *fl)
{
struct xfrm_policy *pol;
- struct net *net = sock_net(sk);
rcu_read_lock();
- read_lock_bh(&net->xfrm.xfrm_policy_lock);
+ again:
pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) {
bool match = xfrm_selector_match(&pol->selector, fl,
@@ -1237,8 +1264,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
err = security_xfrm_policy_lookup(pol->security,
fl->flowi_secid,
policy_to_flow_dir(dir));
- if (!err)
- xfrm_pol_hold(pol);
+ if (!err && !xfrm_pol_hold_rcu(pol))
+ goto again;
else if (err == -ESRCH)
pol = NULL;
else
@@ -1247,7 +1274,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
pol = NULL;
}
out:
- read_unlock_bh(&net->xfrm.xfrm_policy_lock);
rcu_read_unlock();
return pol;
}
@@ -1271,7 +1297,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
- hlist_del(&pol->bydst);
+ hlist_del_rcu(&pol->bydst);
hlist_del(&pol->byidx);
}
@@ -1295,9 +1321,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
pol = __xfrm_policy_unlink(pol, dir);
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (pol) {
xfrm_policy_kill(pol);
return 0;
@@ -1316,7 +1342,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
return -EINVAL;
#endif
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
old_pol = rcu_dereference_protected(sk->sk_policy[dir],
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
if (pol) {
@@ -1334,7 +1360,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
*/
xfrm_sk_policy_unlink(old_pol, dir);
}
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (old_pol) {
xfrm_policy_kill(old_pol);
@@ -1364,9 +1390,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->type = old->type;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
- write_lock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
xfrm_sk_policy_link(newp, dir);
- write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
xfrm_pol_put(newp);
}
return newp;
@@ -3048,7 +3074,7 @@ static int __net_init xfrm_net_init(struct net *net)
/* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock);
- rwlock_init(&net->xfrm.xfrm_policy_lock);
+ spin_lock_init(&net->xfrm.xfrm_policy_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
return 0;
@@ -3082,6 +3108,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
void __init xfrm_init(void)
{
register_pernet_subsys(&xfrm_net_ops);
+ seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
}
@@ -3179,7 +3206,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
struct hlist_head *chain;
u32 priority = ~0U;
- read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) {
if (xfrm_migrate_selector_match(sel, &pol->selector) &&
@@ -3203,7 +3230,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
xfrm_pol_hold(ret);
- read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return ret;
}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 4fd725a0c500..cdc2e2e71bff 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
x->repl->notify(x, XFRM_REPLAY_UPDATE);
}
-static struct xfrm_replay xfrm_replay_legacy = {
+static const struct xfrm_replay xfrm_replay_legacy = {
.advance = xfrm_replay_advance,
.check = xfrm_replay_check,
.recheck = xfrm_replay_check,
@@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = {
.overflow = xfrm_replay_overflow,
};
-static struct xfrm_replay xfrm_replay_bmp = {
+static const struct xfrm_replay xfrm_replay_bmp = {
.advance = xfrm_replay_advance_bmp,
.check = xfrm_replay_check_bmp,
.recheck = xfrm_replay_check_bmp,
@@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = {
.overflow = xfrm_replay_overflow_bmp,
};
-static struct xfrm_replay xfrm_replay_esn = {
+static const struct xfrm_replay xfrm_replay_esn = {
.advance = xfrm_replay_advance_esn,
.check = xfrm_replay_check_esn,
.recheck = xfrm_replay_recheck_esn,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9895a8c56d8c..ba8bf518ba14 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -28,6 +28,11 @@
#include "xfrm_hash.h"
+#define xfrm_state_deref_prot(table, net) \
+ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+
+static void xfrm_state_gc_task(struct work_struct *work);
+
/* Each xfrm_state may be linked to two tables:
1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
@@ -36,6 +41,15 @@
*/
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
+
+static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
+static HLIST_HEAD(xfrm_state_gc_list);
+
+static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+{
+ return atomic_inc_not_zero(&x->refcnt);
+}
static inline unsigned int xfrm_dst_hash(struct net *net,
const xfrm_address_t *daddr,
@@ -76,18 +90,18 @@ static void xfrm_hash_transfer(struct hlist_head *list,
h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family,
nhashmask);
- hlist_add_head(&x->bydst, ndsttable+h);
+ hlist_add_head_rcu(&x->bydst, ndsttable + h);
h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
x->props.family,
nhashmask);
- hlist_add_head(&x->bysrc, nsrctable+h);
+ hlist_add_head_rcu(&x->bysrc, nsrctable + h);
if (x->id.spi) {
h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
x->id.proto, x->props.family,
nhashmask);
- hlist_add_head(&x->byspi, nspitable+h);
+ hlist_add_head_rcu(&x->byspi, nspitable + h);
}
}
}
@@ -122,25 +136,29 @@ static void xfrm_hash_resize(struct work_struct *work)
}
spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ write_seqcount_begin(&xfrm_state_hash_generation);
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+ odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
for (i = net->xfrm.state_hmask; i >= 0; i--)
- xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
- nhashmask);
+ xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
- odst = net->xfrm.state_bydst;
- osrc = net->xfrm.state_bysrc;
- ospi = net->xfrm.state_byspi;
+ osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
+ ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
ohashmask = net->xfrm.state_hmask;
- net->xfrm.state_bydst = ndst;
- net->xfrm.state_bysrc = nsrc;
- net->xfrm.state_byspi = nspi;
+ rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+ rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+ rcu_assign_pointer(net->xfrm.state_byspi, nspi);
net->xfrm.state_hmask = nhashmask;
+ write_seqcount_end(&xfrm_state_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
osize = (ohashmask + 1) * sizeof(struct hlist_head);
+
+ synchronize_rcu();
+
xfrm_hash_free(odst, osize);
xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize);
@@ -355,15 +373,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
static void xfrm_state_gc_task(struct work_struct *work)
{
- struct net *net = container_of(work, struct net, xfrm.state_gc_work);
struct xfrm_state *x;
struct hlist_node *tmp;
struct hlist_head gc_list;
spin_lock_bh(&xfrm_state_gc_lock);
- hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
+ hlist_move_list(&xfrm_state_gc_list, &gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
+ synchronize_rcu();
+
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
xfrm_state_gc_destroy(x);
}
@@ -500,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc);
void __xfrm_state_destroy(struct xfrm_state *x)
{
- struct net *net = xs_net(x);
-
WARN_ON(x->km.state != XFRM_STATE_DEAD);
spin_lock_bh(&xfrm_state_gc_lock);
- hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
- schedule_work(&net->xfrm.state_gc_work);
+ schedule_work(&xfrm_state_gc_work);
}
EXPORT_SYMBOL(__xfrm_state_destroy);
@@ -520,10 +537,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
x->km.state = XFRM_STATE_DEAD;
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
- hlist_del(&x->bydst);
- hlist_del(&x->bysrc);
+ hlist_del_rcu(&x->bydst);
+ hlist_del_rcu(&x->bysrc);
if (x->id.spi)
- hlist_del(&x->byspi);
+ hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock);
@@ -659,7 +676,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
if (x->props.family != family ||
x->id.spi != spi ||
x->id.proto != proto ||
@@ -668,7 +685,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
if ((mark & x->mark.m) != x->mark.v)
continue;
- xfrm_state_hold(x);
+ if (!xfrm_state_hold_rcu(x))
+ continue;
return x;
}
@@ -683,7 +701,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
if (x->props.family != family ||
x->id.proto != proto ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -692,7 +710,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
if ((mark & x->mark.m) != x->mark.v)
continue;
- xfrm_state_hold(x);
+ if (!xfrm_state_hold_rcu(x))
+ continue;
return x;
}
@@ -775,13 +794,16 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
struct xfrm_state *best = NULL;
u32 mark = pol->mark.v & pol->mark.m;
unsigned short encap_family = tmpl->encap_family;
+ unsigned int sequence;
struct km_event c;
to_put = NULL;
- spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+
+ rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
@@ -797,7 +819,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
goto found;
h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
@@ -850,19 +872,21 @@ found:
}
if (km_query(x, tmpl, pol) == 0) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ;
list_add(&x->km.all, &net->xfrm.state_all);
- hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, daddr, saddr, encap_family);
- hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
- hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
net->xfrm.state_num++;
xfrm_hash_grow_check(net, x->bydst.next != NULL);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
} else {
x->km.state = XFRM_STATE_DEAD;
to_put = x;
@@ -871,13 +895,26 @@ found:
}
}
out:
- if (x)
- xfrm_state_hold(x);
- else
+ if (x) {
+ if (!xfrm_state_hold_rcu(x)) {
+ *err = -EAGAIN;
+ x = NULL;
+ }
+ } else {
*err = acquire_in_progress ? -EAGAIN : error;
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+ rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);
+
+ if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+ *err = -EAGAIN;
+ if (x) {
+ xfrm_state_put(x);
+ x = NULL;
+ }
+ }
+
return x;
}
@@ -945,16 +982,16 @@ static void __xfrm_state_insert(struct xfrm_state *x)
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
- hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
- hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
x->props.family);
- hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
@@ -1063,9 +1100,9 @@ static struct xfrm_state *__find_acq_core(struct net *net,
xfrm_state_hold(x);
tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
list_add(&x->km.all, &net->xfrm.state_all);
- hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, daddr, saddr, family);
- hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
net->xfrm.state_num++;
@@ -1581,7 +1618,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
if (x->id.spi) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
- hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0;
@@ -2099,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_num = 0;
INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
- INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
- INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
spin_lock_init(&net->xfrm.xfrm_state_lock);
return 0;
@@ -2118,7 +2153,7 @@ void xfrm_state_fini(struct net *net)
flush_work(&net->xfrm.state_hash_work);
xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
- flush_work(&net->xfrm.state_gc_work);
+ flush_work(&xfrm_state_gc_work);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 78c6f131d94f..1f6cc9b6a23b 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -1529,6 +1529,108 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "direct packet access: test5 (pkt_end >= reg, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test6 (pkt_end >= reg, bad access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test7 (pkt_end >= reg, both accesses)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test8 (double test, variant 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test9 (double test, variant 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
"helper access to packet: test1, valid packet_ptr range",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,