diff options
-rw-r--r-- | drivers/dma/ioat/dca.c | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 123 | ||||
-rw-r--r-- | drivers/net/phy/smsc.c | 69 | ||||
-rw-r--r-- | include/linux/netdevice.h | 20 | ||||
-rw-r--r-- | include/net/ip6_checksum.h | 35 | ||||
-rw-r--r-- | include/net/protocol.h | 31 | ||||
-rw-r--r-- | net/core/dev.c | 107 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 84 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 4 | ||||
-rw-r--r-- | net/ipv4/protocol.c | 21 | ||||
-rw-r--r-- | net/ipv6/Makefile | 7 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 240 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 52 | ||||
-rw-r--r-- | net/ipv6/exthdrs_core.c | 44 | ||||
-rw-r--r-- | net/ipv6/exthdrs_offload.c | 41 | ||||
-rw-r--r-- | net/ipv6/ip6_offload.c | 282 | ||||
-rw-r--r-- | net/ipv6/ip6_offload.h | 18 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 65 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 2 | ||||
-rw-r--r-- | net/ipv6/output_core.c | 76 | ||||
-rw-r--r-- | net/ipv6/protocol.c | 25 | ||||
-rw-r--r-- | net/ipv6/sit.c | 10 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 107 | ||||
-rw-r--r-- | net/ipv6/tcpv6_offload.c | 95 | ||||
-rw-r--r-- | net/ipv6/udp.c | 94 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 119 |
27 files changed, 1072 insertions, 725 deletions
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index abd9038e06b1..d6668071bd0d 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -604,6 +604,23 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) return slots; } +static inline int dca3_tag_map_invalid(u8 *tag_map) +{ + /* + * If the tag map is not programmed by the BIOS the default is: + * 0x80 0x80 0x80 0x80 0x80 0x00 0x00 0x00 + * + * This an invalid map and will result in only 2 possible tags + * 0x1F and 0x00. 0x00 is an invalid DCA tag so we know that + * this entire definition is invalid. + */ + return ((tag_map[0] == DCA_TAG_MAP_VALID) && + (tag_map[1] == DCA_TAG_MAP_VALID) && + (tag_map[2] == DCA_TAG_MAP_VALID) && + (tag_map[3] == DCA_TAG_MAP_VALID) && + (tag_map[4] == DCA_TAG_MAP_VALID)); +} + struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) { @@ -674,6 +691,12 @@ ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; } + if (dca3_tag_map_invalid(ioatdca->tag_map)) { + dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n"); + free_dca_provider(dca); + return NULL; + } + err = register_dca_provider(dca, &pdev->dev); if (err) { free_dca_provider(dca); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 1211fa093437..fc0af9a3bb35 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -58,7 +58,6 @@ struct ixgbevf_ring { struct ixgbevf_ring *next; struct net_device *netdev; struct device *dev; - struct ixgbevf_adapter *adapter; /* backlink */ void *desc; /* descriptor ring memory */ dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ @@ -75,6 +74,8 @@ struct ixgbevf_ring { u64 total_bytes; u64 total_packets; struct u64_stats_sync syncp; + u64 hw_csum_rx_error; + u64 hw_csum_rx_good; u16 head; u16 tail; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 9d8815302363..f267c003a1bc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -121,7 +121,6 @@ static inline void ixgbevf_release_rx_desc(struct ixgbe_hw *hw, * @direction: 0 for Rx, 1 for Tx, -1 for other causes * @queue: queue to map the corresponding interrupt to * @msix_vector: the vector to map to the corresponding queue - * */ static void ixgbevf_set_ivar(struct ixgbevf_adapter *adapter, s8 direction, u8 queue, u8 msix_vector) @@ -296,12 +295,11 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, /** * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum - * @adapter: address of board private structure + * @ring: pointer to Rx descriptor ring structure * @status_err: hardware indication of status of receive * @skb: skb currently being received and modified **/ -static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring, +static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, u32 status_err, struct sk_buff *skb) { skb_checksum_none_assert(skb); @@ -313,7 +311,7 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter, /* if IP and error */ if ((status_err & IXGBE_RXD_STAT_IPCS) && (status_err & IXGBE_RXDADV_ERR_IPE)) { - adapter->hw_csum_rx_error++; + ring->hw_csum_rx_error++; return; } @@ -321,13 +319,13 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter, return; if (status_err & IXGBE_RXDADV_ERR_TCPE) { - adapter->hw_csum_rx_error++; + ring->hw_csum_rx_error++; return; } /* It must be a TCP or UDP packet with a valid checksum */ skb->ip_summed = CHECKSUM_UNNECESSARY; - adapter->hw_csum_rx_good++; + ring->hw_csum_rx_good++; } /** @@ -341,15 +339,16 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter, struct pci_dev *pdev = adapter->pdev; union ixgbe_adv_rx_desc *rx_desc; struct ixgbevf_rx_buffer *bi; - struct sk_buff *skb; unsigned int i = rx_ring->next_to_use; bi = &rx_ring->rx_buffer_info[i]; while (cleaned_count--) { rx_desc = IXGBEVF_RX_DESC(rx_ring, i); - skb = bi->skb; - if (!skb) { + + if (!bi->skb) { + struct sk_buff *skb; + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, rx_ring->rx_buf_len); if (!skb) { @@ -357,8 +356,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter, goto no_buffers; } bi->skb = skb; - } - if (!bi->dma) { + bi->dma = dma_map_single(&pdev->dev, skb->data, rx_ring->rx_buf_len, DMA_FROM_DEVICE); @@ -380,7 +378,6 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter, no_buffers: if (rx_ring->next_to_use != i) { rx_ring->next_to_use = i; - ixgbevf_release_rx_desc(&adapter->hw, rx_ring, i); } } @@ -464,7 +461,7 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, goto next_desc; } - ixgbevf_rx_checksum(adapter, rx_ring, staterr, skb); + ixgbevf_rx_checksum(rx_ring, staterr, skb); /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -765,7 +762,6 @@ static irqreturn_t ixgbevf_msix_other(int irq, void *data) return IRQ_HANDLED; } - /** * ixgbevf_msix_clean_rings - single unshared vector rx clean (all queues) * @irq: unused @@ -1150,9 +1146,6 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) struct ixgbe_hw *hw = &adapter->hw; int err; - if (!hw->mac.ops.set_vfta) - return -EOPNOTSUPP; - spin_lock_bh(&adapter->mbx_lock); /* add VID to filter table */ @@ -1181,8 +1174,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) spin_lock_bh(&adapter->mbx_lock); /* remove VID from filter table */ - if (hw->mac.ops.set_vfta) - err = hw->mac.ops.set_vfta(hw, vid, 0, false); + err = hw->mac.ops.set_vfta(hw, vid, 0, false); spin_unlock_bh(&adapter->mbx_lock); @@ -1228,12 +1220,13 @@ static int ixgbevf_write_uc_addr_list(struct net_device *netdev) } /** - * ixgbevf_set_rx_mode - Multicast set + * ixgbevf_set_rx_mode - Multicast and unicast set * @netdev: network interface device structure * * The set_rx_method entry point is called whenever the multicast address - * list or the network interface flags are updated. This routine is - * responsible for configuring the hardware for proper multicast mode. + * list, unicast address list or the network interface flags are updated. + * This routine is responsible for configuring the hardware for proper + * multicast mode and configuring requested unicast filters. **/ static void ixgbevf_set_rx_mode(struct net_device *netdev) { @@ -1243,8 +1236,7 @@ static void ixgbevf_set_rx_mode(struct net_device *netdev) spin_lock_bh(&adapter->mbx_lock); /* reprogram multicast list */ - if (hw->mac.ops.update_mc_addr_list) - hw->mac.ops.update_mc_addr_list(hw, netdev); + hw->mac.ops.update_mc_addr_list(hw, netdev); ixgbevf_write_uc_addr_list(netdev); @@ -1312,8 +1304,8 @@ static inline void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, "not set within the polling period\n", rxr); } - ixgbevf_release_rx_desc(&adapter->hw, &adapter->rx_ring[rxr], - (adapter->rx_ring[rxr].count - 1)); + ixgbevf_release_rx_desc(hw, &adapter->rx_ring[rxr], + adapter->rx_ring[rxr].count - 1); } static void ixgbevf_save_reset_stats(struct ixgbevf_adapter *adapter) @@ -1414,12 +1406,10 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) spin_lock_bh(&adapter->mbx_lock); - if (hw->mac.ops.set_rar) { - if (is_valid_ether_addr(hw->mac.addr)) - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); - else - hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0); - } + if (is_valid_ether_addr(hw->mac.addr)) + hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); + else + hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0); spin_unlock_bh(&adapter->mbx_lock); @@ -1595,7 +1585,6 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_adapter *adapter, return; /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) { tx_buffer_info = &tx_ring->tx_buffer_info[i]; ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); @@ -1691,13 +1680,6 @@ void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state)) msleep(1); - /* - * Check if PF is up before re-init. If not then skip until - * later when the PF is up and ready to service requests from - * the VF via mailbox. If the VF is up and running then the - * watchdog task will continue to schedule reset tasks until - * the PF is up and running. - */ ixgbevf_down(adapter); ixgbevf_up(adapter); @@ -1709,15 +1691,11 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; - spin_lock_bh(&adapter->mbx_lock); - if (hw->mac.ops.reset_hw(hw)) hw_dbg(hw, "PF still resetting\n"); else hw->mac.ops.init_hw(hw); - spin_unlock_bh(&adapter->mbx_lock); - if (is_valid_ether_addr(adapter->hw.mac.addr)) { memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); @@ -1768,6 +1746,7 @@ static int ixgbevf_acquire_msix_vectors(struct ixgbevf_adapter *adapter, */ adapter->num_msix_vectors = vectors; } + return err; } @@ -2064,7 +2043,7 @@ static int __devinit ixgbevf_sw_init(struct ixgbevf_adapter *adapter) goto out; } memcpy(adapter->netdev->dev_addr, adapter->hw.mac.addr, - adapter->netdev->addr_len); + adapter->netdev->addr_len); } /* lock to protect mailbox accesses */ @@ -2114,6 +2093,7 @@ out: void ixgbevf_update_stats(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + int i; UPDATE_VF_COUNTER_32bit(IXGBE_VFGPRC, adapter->stats.last_vfgprc, adapter->stats.vfgprc); @@ -2127,6 +2107,15 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter) adapter->stats.vfgotc); UPDATE_VF_COUNTER_32bit(IXGBE_VFMPRC, adapter->stats.last_vfmprc, adapter->stats.vfmprc); + + for (i = 0; i < adapter->num_rx_queues; i++) { + adapter->hw_csum_rx_error += + adapter->rx_ring[i].hw_csum_rx_error; + adapter->hw_csum_rx_good += + adapter->rx_ring[i].hw_csum_rx_good; + adapter->rx_ring[i].hw_csum_rx_error = 0; + adapter->rx_ring[i].hw_csum_rx_good = 0; + } } /** @@ -2201,6 +2190,7 @@ static void ixgbevf_watchdog_task(struct work_struct *work) struct ixgbe_hw *hw = &adapter->hw; u32 link_speed = adapter->link_speed; bool link_up = adapter->link_up; + s32 need_reset; adapter->flags |= IXGBE_FLAG_IN_WATCHDOG_TASK; @@ -2208,29 +2198,19 @@ static void ixgbevf_watchdog_task(struct work_struct *work) * Always check the link on the watchdog because we have * no LSC interrupt */ - if (hw->mac.ops.check_link) { - s32 need_reset; - - spin_lock_bh(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); - need_reset = hw->mac.ops.check_link(hw, &link_speed, - &link_up, false); + need_reset = hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - spin_unlock_bh(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); - if (need_reset) { - adapter->link_up = link_up; - adapter->link_speed = link_speed; - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - schedule_work(&adapter->reset_task); - goto pf_has_reset; - } - } else { - /* always assume link is up, if no check link - * function */ - link_speed = IXGBE_LINK_SPEED_10GB_FULL; - link_up = true; + if (need_reset) { + adapter->link_up = link_up; + adapter->link_speed = link_speed; + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + schedule_work(&adapter->reset_task); + goto pf_has_reset; } adapter->link_up = link_up; adapter->link_speed = link_speed; @@ -2723,9 +2703,6 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, static bool ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, struct sk_buff *skb, u32 tx_flags) { - - - u32 vlan_macip_lens = 0; u32 mss_l4len_idx = 0; u32 type_tucmd = 0; @@ -2915,7 +2892,6 @@ static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, olinfo_status |= (1 << IXGBE_ADVTXD_IDX_SHIFT); if (tx_flags & IXGBE_TX_FLAGS_IPV4) olinfo_status |= IXGBE_ADVTXD_POPTS_IXSM; - } /* @@ -3070,8 +3046,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) spin_lock_bh(&adapter->mbx_lock); - if (hw->mac.ops.set_rar) - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); + hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); spin_unlock_bh(&adapter->mbx_lock); @@ -3396,10 +3371,6 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, if (err) goto err_sw_init; - /* pick up the PCI bus settings for reporting later */ - if (hw->mac.ops.get_bus_info) - hw->mac.ops.get_bus_info(hw); - strcpy(netdev->name, "eth%d"); err = register_netdev(netdev); diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 88e3991464e7..16dceed29d8c 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -56,35 +56,52 @@ static int smsc_phy_config_init(struct phy_device *phydev) return smsc_phy_ack_interrupt (phydev); } -static int lan87xx_config_init(struct phy_device *phydev) +static int lan911x_config_init(struct phy_device *phydev) { - /* - * Make sure the EDPWRDOWN bit is NOT set. Setting this bit on - * LAN8710/LAN8720 PHY causes the PHY to misbehave, likely due - * to a bug on the chip. - * - * When the system is powered on with the network cable being - * disconnected all the way until after ifconfig ethX up is - * issued for the LAN port with this PHY, connecting the cable - * afterwards does not cause LINK change detection, while the - * expected behavior is the Link UP being detected. - */ - int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); - if (rc < 0) - return rc; - - rc &= ~MII_LAN83C185_EDPWRDOWN; - - rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, rc); - if (rc < 0) - return rc; - return smsc_phy_ack_interrupt(phydev); } -static int lan911x_config_init(struct phy_device *phydev) +/* + * The LAN8710/LAN8720 requires a minimum of 2 link pulses within 64ms of each + * other in order to set the ENERGYON bit and exit EDPD mode. If a link partner + * does send the pulses within this interval, the PHY will remained powered + * down. + * + * This workaround will manually toggle the PHY on/off upon calls to read_status + * in order to generate link test pulses if the link is down. If a link partner + * is present, it will respond to the pulses, which will cause the ENERGYON bit + * to be set and will cause the EDPD mode to be exited. + */ +static int lan87xx_read_status(struct phy_device *phydev) { - return smsc_phy_ack_interrupt(phydev); + int err = genphy_read_status(phydev); + + if (!phydev->link) { + /* Disable EDPD to wake up PHY */ + int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); + if (rc < 0) + return rc; + + rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, + rc & ~MII_LAN83C185_EDPWRDOWN); + if (rc < 0) + return rc; + + /* Sleep 64 ms to allow ~5 link test pulses to be sent */ + msleep(64); + + /* Re-enable EDPD */ + rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); + if (rc < 0) + return rc; + + rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, + rc | MII_LAN83C185_EDPWRDOWN); + if (rc < 0) + return rc; + } + + return err; } static struct phy_driver smsc_phy_driver[] = { @@ -187,8 +204,8 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .config_init = lan87xx_config_init, + .read_status = lan87xx_read_status, + .config_intr = smsc_phy_config_intr, /* IRQ related */ .ack_interrupt = smsc_phy_ack_interrupt, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7bf867c97043..e46c830c88d8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1509,16 +1509,25 @@ struct packet_type { struct net_device *, struct packet_type *, struct net_device *); + bool (*id_match)(struct packet_type *ptype, + struct sock *sk); + void *af_packet_priv; + struct list_head list; +}; + +struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); - bool (*id_match)(struct packet_type *ptype, - struct sock *sk); - void *af_packet_priv; - struct list_head list; +}; + +struct packet_offload { + __be16 type; /* This is really htons(ether_type). */ + struct offload_callbacks callbacks; + struct list_head list; }; #include <linux/notifier.h> @@ -1615,6 +1624,9 @@ extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short extern void dev_add_pack(struct packet_type *pt); extern void dev_remove_pack(struct packet_type *pt); extern void __dev_remove_pack(struct packet_type *pt); +extern void dev_add_offload(struct packet_offload *po); +extern void dev_remove_offload(struct packet_offload *po); +extern void __dev_remove_offload(struct packet_offload *po); extern struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, unsigned short mask); diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index bc1b0fda2b04..652d3d309357 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -31,6 +31,8 @@ #include <net/ip.h> #include <asm/checksum.h> #include <linux/in6.h> +#include <linux/tcp.h> +#include <linux/ipv6.h> #ifndef _HAVE_ARCH_IPV6_CSUM @@ -91,4 +93,37 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, } #endif + +static __inline__ __sum16 tcp_v6_check(int len, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __wsum base) +{ + return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); +} + +static inline void __tcp_v6_send_check(struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr) +{ + struct tcphdr *th = tcp_hdr(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + } else { + th->check = tcp_v6_check(skb->len, saddr, daddr, + csum_partial(th, th->doff << 2, + skb->csum)); + } +} + +static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + __tcp_v6_send_check(skb, &np->saddr, &np->daddr); +} + #endif diff --git a/include/net/protocol.h b/include/net/protocol.h index 929528c73fe8..047c0476c0a0 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -25,9 +25,11 @@ #define _PROTOCOL_H #include <linux/in6.h> +#include <linux/skbuff.h> #if IS_ENABLED(CONFIG_IPV6) #include <linux/ipv6.h> #endif +#include <linux/netdevice.h> /* This is one larger than the largest protocol value that can be * found in an ipv4 or ipv6 header. Since in both cases the protocol @@ -40,12 +42,6 @@ struct net_protocol { void (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); - int (*gso_send_check)(struct sk_buff *skb); - struct sk_buff *(*gso_segment)(struct sk_buff *skb, - netdev_features_t features); - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb); - int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, netns_ok:1; }; @@ -60,23 +56,20 @@ struct inet6_protocol { struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); - - int (*gso_send_check)(struct sk_buff *skb); - struct sk_buff *(*gso_segment)(struct sk_buff *skb, - netdev_features_t features); - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb); - int (*gro_complete)(struct sk_buff *skb); - unsigned int flags; /* INET6_PROTO_xxx */ }; #define INET6_PROTO_NOPOLICY 0x1 #define INET6_PROTO_FINAL 0x2 -/* This should be set for any extension header which is compatible with GSO. */ -#define INET6_PROTO_GSO_EXTHDR 0x4 #endif +struct net_offload { + struct offload_callbacks callbacks; + unsigned int flags; /* Flags used by IPv6 for now */ +}; +/* This should be set for any extension header which is compatible with GSO. */ +#define INET6_PROTO_GSO_EXTHDR 0x1 + /* This is used to register socket interfaces for IP protocols. */ struct inet_protosw { struct list_head list; @@ -96,6 +89,8 @@ struct inet_protosw { #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS]; +extern const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS]; +extern const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS]; #if IS_ENABLED(CONFIG_IPV6) extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; @@ -103,6 +98,8 @@ extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; extern int inet_add_protocol(const struct net_protocol *prot, unsigned char num); extern int inet_del_protocol(const struct net_protocol *prot, unsigned char num); +extern int inet_add_offload(const struct net_offload *prot, unsigned char num); +extern int inet_del_offload(const struct net_offload *prot, unsigned char num); extern void inet_register_protosw(struct inet_protosw *p); extern void inet_unregister_protosw(struct inet_protosw *p); @@ -112,5 +109,7 @@ extern int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char n extern int inet6_register_protosw(struct inet_protosw *p); extern void inet6_unregister_protosw(struct inet_protosw *p); #endif +extern int inet6_add_offload(const struct net_offload *prot, unsigned char num); +extern int inet6_del_offload(const struct net_offload *prot, unsigned char num); #endif /* _PROTOCOL_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 83232a1be1e7..cf105e886cca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -176,8 +176,10 @@ #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) static DEFINE_SPINLOCK(ptype_lock); +static DEFINE_SPINLOCK(offload_lock); static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; static struct list_head ptype_all __read_mostly; /* Taps */ +static struct list_head offload_base __read_mostly; /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt) } EXPORT_SYMBOL(dev_remove_pack); + +/** + * dev_add_offload - register offload handlers + * @po: protocol offload declaration + * + * Add protocol offload handlers to the networking stack. The passed + * &proto_offload is linked into kernel lists and may not be freed until + * it has been removed from the kernel lists. + * + * This call does not sleep therefore it can not + * guarantee all CPU's that are in middle of receiving packets + * will see the new offload handlers (until the next received packet). + */ +void dev_add_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + + spin_lock(&offload_lock); + list_add_rcu(&po->list, head); + spin_unlock(&offload_lock); +} +EXPORT_SYMBOL(dev_add_offload); + +/** + * __dev_remove_offload - remove offload handler + * @po: packet offload declaration + * + * Remove a protocol offload handler that was previously added to the + * kernel offload handlers by dev_add_offload(). The passed &offload_type + * is removed from the kernel lists and can be freed or reused once this + * function returns. + * + * The packet type might still be in use by receivers + * and must not be freed until after all the CPU's have gone + * through a quiescent state. + */ +void __dev_remove_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + struct packet_offload *po1; + + spin_lock(&ptype_lock); + + list_for_each_entry(po1, head, list) { + if (po == po1) { + list_del_rcu(&po->list); + goto out; + } + } + + pr_warn("dev_remove_offload: %p not found\n", po); +out: + spin_unlock(&ptype_lock); +} +EXPORT_SYMBOL(__dev_remove_offload); + +/** + * dev_remove_offload - remove packet offload handler + * @po: packet offload declaration + * + * Remove a packet offload handler that was previously added to the kernel + * offload handlers by dev_add_offload(). The passed &offload_type is + * removed from the kernel lists and can be freed or reused once this + * function returns. + * + * This call sleeps to guarantee that no CPU is looking at the packet + * type after return. + */ +void dev_remove_offload(struct packet_offload *po) +{ + __dev_remove_offload(po); + + synchronize_net(); +} +EXPORT_SYMBOL(dev_remove_offload); + /****************************************************************************** Device Boot-time Settings Routines @@ -1994,7 +2072,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; int err; @@ -2023,18 +2101,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, } rcu_read_lock(); - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - err = ptype->gso_send_check(skb); + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) break; __skb_push(skb, (skb->data - skb_network_header(skb))); } - segs = ptype->gso_segment(skb, features); + segs = ptype->callbacks.gso_segment(skb, features); break; } } @@ -3444,9 +3521,9 @@ static void flush_backlog(void *arg) static int napi_gro_complete(struct sk_buff *skb) { - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int err = -ENOENT; if (NAPI_GRO_CB(skb)->count == 1) { @@ -3456,10 +3533,10 @@ static int napi_gro_complete(struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_complete) + if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->gro_complete(skb); + err = ptype->callbacks.gro_complete(skb); break; } rcu_read_unlock(); @@ -3506,9 +3583,9 @@ EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int same_flow; int mac_len; enum gro_result ret; @@ -3521,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_receive) + if (ptype->type != type || !ptype->callbacks.gro_receive) continue; skb_set_network_header(skb, skb_gro_offset(skb)); @@ -3531,7 +3608,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - pp = ptype->gro_receive(&napi->gro_list, skb); + pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; } rcu_read_unlock(); @@ -6661,6 +6738,8 @@ static int __init net_dev_init(void) for (i = 0; i < PTYPE_HASH_SIZE; i++) INIT_LIST_HEAD(&ptype_base[i]); + INIT_LIST_HEAD(&offload_base); + if (register_pernet_subsys(&netdev_net_ops)) goto out; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 766c59658563..d5e5a054123c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1251,7 +1251,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { - const struct net_protocol *ops; + const struct net_offload *ops; const struct iphdr *iph; int proto; int ihl; @@ -1275,9 +1275,9 @@ static int inet_gso_send_check(struct sk_buff *skb) err = -EPROTONOSUPPORT; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (likely(ops && ops->gso_send_check)) - err = ops->gso_send_check(skb); + ops = rcu_dereference(inet_offloads[proto]); + if (likely(ops && ops->callbacks.gso_send_check)) + err = ops->callbacks.gso_send_check(skb); rcu_read_unlock(); out: @@ -1288,7 +1288,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - const struct net_protocol *ops; + const struct net_offload *ops; struct iphdr *iph; int proto; int ihl; @@ -1325,9 +1325,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (likely(ops && ops->gso_segment)) - segs = ops->gso_segment(skb, features); + ops = rcu_dereference(inet_offloads[proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); rcu_read_unlock(); if (!segs || IS_ERR(segs)) @@ -1356,7 +1356,7 @@ out: static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct net_protocol *ops; + const struct net_offload *ops; struct sk_buff **pp = NULL; struct sk_buff *p; const struct iphdr *iph; @@ -1378,8 +1378,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, proto = iph->protocol; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (!ops || !ops->gro_receive) + ops = rcu_dereference(inet_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) goto out_unlock; if (*(u8 *)iph != 0x45) @@ -1420,7 +1420,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->gro_receive(head, skb); + pp = ops->callbacks.gro_receive(head, skb); out_unlock: rcu_read_unlock(); @@ -1435,7 +1435,7 @@ static int inet_gro_complete(struct sk_buff *skb) { __be16 newlen = htons(skb->len - skb_network_offset(skb)); struct iphdr *iph = ip_hdr(skb); - const struct net_protocol *ops; + const struct net_offload *ops; int proto = iph->protocol; int err = -ENOSYS; @@ -1443,11 +1443,11 @@ static int inet_gro_complete(struct sk_buff *skb) iph->tot_len = newlen; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (WARN_ON(!ops || !ops->gro_complete)) + ops = rcu_dereference(inet_offloads[proto]); + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->gro_complete(skb); + err = ops->callbacks.gro_complete(skb); out_unlock: rcu_read_unlock(); @@ -1558,23 +1558,33 @@ static const struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; +static const struct net_offload tcp_offload = { + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, +}; + static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, .no_policy = 1, .netns_ok = 1, }; +static const struct net_offload udp_offload = { + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, +}; + static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = ping_err, @@ -1659,13 +1669,35 @@ static int ipv4_proc_init(void); * IP protocol layer initialiser */ +static struct packet_offload ip_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IP), + .callbacks = { + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, + }, +}; + +static int __init ipv4_offload_init(void) +{ + /* + * Add offloads + */ + if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + pr_crit("%s: Cannot add UDP protocol offload\n", __func__); + if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) + pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + + dev_add_offload(&ip_packet_offload); + return 0; +} + +fs_initcall(ipv4_offload_init); + static struct packet_type ip_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, - .gso_send_check = inet_gso_send_check, - .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, }; static int __init inet_init(void) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 64686e1f54d9..c26c1717c1db 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -864,10 +864,10 @@ static void ipip_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]); + parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]); + parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) { parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 8918eff1426d..0f9d09f54bd9 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -29,6 +29,7 @@ #include <net/protocol.h> const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; +const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; /* * Add a protocol handler to the hash tables @@ -41,6 +42,13 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) } EXPORT_SYMBOL(inet_add_protocol); +int inet_add_offload(const struct net_offload *prot, unsigned char protocol) +{ + return !cmpxchg((const struct net_offload **)&inet_offloads[protocol], + NULL, prot) ? 0 : -1; +} +EXPORT_SYMBOL(inet_add_offload); + /* * Remove a protocol from the hash tables. */ @@ -57,3 +65,16 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) return ret; } EXPORT_SYMBOL(inet_del_protocol); + +int inet_del_offload(const struct net_offload *prot, unsigned char protocol) +{ + int ret; + + ret = (cmpxchg((const struct net_offload **)&inet_offloads[protocol], + prot, NULL) == prot) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(inet_del_offload); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index b6d3f79151e2..04a475df98ad 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,9 +7,11 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o +ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o + ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o @@ -38,6 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o +obj-y += addrconf_core.o exthdrs_core.o output_core.o protocol.o +obj-y += $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a974247a9ae4..7bafc51cda11 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -699,249 +699,9 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ipv6_opt_accepted); -static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) -{ - const struct inet6_protocol *ops = NULL; - - for (;;) { - struct ipv6_opt_hdr *opth; - int len; - - if (proto != NEXTHDR_HOP) { - ops = rcu_dereference(inet6_protos[proto]); - - if (unlikely(!ops)) - break; - - if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) - break; - } - - if (unlikely(!pskb_may_pull(skb, 8))) - break; - - opth = (void *)skb->data; - len = ipv6_optlen(opth); - - if (unlikely(!pskb_may_pull(skb, len))) - break; - - proto = opth->nexthdr; - __skb_pull(skb, len); - } - - return proto; -} - -static int ipv6_gso_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - const struct inet6_protocol *ops; - int err = -EINVAL; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - err = -EPROTONOSUPPORT; - - rcu_read_lock(); - ops = rcu_dereference(inet6_protos[ - ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); - - if (likely(ops && ops->gso_send_check)) { - skb_reset_transport_header(skb); - err = ops->gso_send_check(skb); - } - rcu_read_unlock(); - -out: - return err; -} - -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct ipv6hdr *ipv6h; - const struct inet6_protocol *ops; - int proto; - struct frag_hdr *fptr; - unsigned int unfrag_ip6hlen; - u8 *prevhdr; - int offset = 0; - - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - 0))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - segs = ERR_PTR(-EPROTONOSUPPORT); - - proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - rcu_read_lock(); - ops = rcu_dereference(inet6_protos[proto]); - if (likely(ops && ops->gso_segment)) { - skb_reset_transport_header(skb); - segs = ops->gso_segment(skb, features); - } - rcu_read_unlock(); - - if (IS_ERR(segs)) - goto out; - - for (skb = segs; skb; skb = skb->next) { - ipv6h = ipv6_hdr(skb); - ipv6h->payload_len = htons(skb->len - skb->mac_len - - sizeof(*ipv6h)); - if (proto == IPPROTO_UDP) { - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - fptr = (struct frag_hdr *)(skb_network_header(skb) + - unfrag_ip6hlen); - fptr->frag_off = htons(offset); - if (skb->next != NULL) - fptr->frag_off |= htons(IP6_MF); - offset += (ntohs(ipv6h->payload_len) - - sizeof(struct frag_hdr)); - } - } - -out: - return segs; -} - -static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) -{ - const struct inet6_protocol *ops; - struct sk_buff **pp = NULL; - struct sk_buff *p; - struct ipv6hdr *iph; - unsigned int nlen; - unsigned int hlen; - unsigned int off; - int flush = 1; - int proto; - __wsum csum; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*iph); - iph = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - iph = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!iph)) - goto out; - } - - skb_gro_pull(skb, sizeof(*iph)); - skb_set_transport_header(skb, skb_gro_offset(skb)); - - flush += ntohs(iph->payload_len) != skb_gro_len(skb); - - rcu_read_lock(); - proto = iph->nexthdr; - ops = rcu_dereference(inet6_protos[proto]); - if (!ops || !ops->gro_receive) { - __pskb_pull(skb, skb_gro_offset(skb)); - proto = ipv6_gso_pull_exthdrs(skb, proto); - skb_gro_pull(skb, -skb_transport_offset(skb)); - skb_reset_transport_header(skb); - __skb_push(skb, skb_gro_offset(skb)); - - ops = rcu_dereference(inet6_protos[proto]); - if (!ops || !ops->gro_receive) - goto out_unlock; - - iph = ipv6_hdr(skb); - } - - NAPI_GRO_CB(skb)->proto = proto; - - flush--; - nlen = skb_network_header_len(skb); - - for (p = *head; p; p = p->next) { - const struct ipv6hdr *iph2; - __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */ - - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - iph2 = ipv6_hdr(p); - first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - - /* All fields must match except length and Traffic Class. */ - if (nlen != skb_network_header_len(p) || - (first_word & htonl(0xF00FFFFF)) || - memcmp(&iph->nexthdr, &iph2->nexthdr, - nlen - offsetof(struct ipv6hdr, nexthdr))) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - /* flush if Traffic Class fields are different */ - NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); - NAPI_GRO_CB(p)->flush |= flush; - } - - NAPI_GRO_CB(skb)->flush |= flush; - - csum = skb->csum; - skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); - - pp = ops->gro_receive(head, skb); - - skb->csum = csum; - -out_unlock: - rcu_read_unlock(); - -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} - -static int ipv6_gro_complete(struct sk_buff *skb) -{ - const struct inet6_protocol *ops; - struct ipv6hdr *iph = ipv6_hdr(skb); - int err = -ENOSYS; - - iph->payload_len = htons(skb->len - skb_network_offset(skb) - - sizeof(*iph)); - - rcu_read_lock(); - ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]); - if (WARN_ON(!ops || !ops->gro_complete)) - goto out_unlock; - - err = ops->gro_complete(skb); - -out_unlock: - rcu_read_unlock(); - - return err; -} - static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, - .gso_send_check = ipv6_gso_send_check, - .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, }; static int __init ipv6_packet_init(void) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index f005acc58b2a..473f628f9f20 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,50 +49,6 @@ #include <asm/uaccess.h> -int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) -{ - const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - skb->network_header; - struct ipv6_opt_hdr *hdr; - int len; - - if (offset + 2 > packet_len) - goto bad; - hdr = (struct ipv6_opt_hdr *)(nh + offset); - len = ((hdr->hdrlen + 1) << 3); - - if (offset + len > packet_len) - goto bad; - - offset += 2; - len -= 2; - - while (len > 0) { - int opttype = nh[offset]; - int optlen; - - if (opttype == type) - return offset; - - switch (opttype) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - default: - optlen = nh[offset + 1] + 2; - if (optlen > len) - goto bad; - break; - } - offset += optlen; - len -= optlen; - } - /* not_found */ - bad: - return -1; -} -EXPORT_SYMBOL_GPL(ipv6_find_tlv); - /* * Parsing tlv encoded headers. * @@ -528,12 +484,12 @@ unknown_rh: static const struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, - .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, + .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, - .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, + .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol nodata_protocol = { @@ -559,10 +515,10 @@ int __init ipv6_exthdrs_init(void) out: return ret; -out_rthdr: - inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); out_destopt: inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); +out_rthdr: + inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); goto out; }; diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index f73d59a14131..e7d756e19d1d 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -111,3 +111,47 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, return start; } EXPORT_SYMBOL(ipv6_skip_exthdr); + +int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) +{ + const unsigned char *nh = skb_network_header(skb); + int packet_len = skb->tail - skb->network_header; + struct ipv6_opt_hdr *hdr; + int len; + + if (offset + 2 > packet_len) + goto bad; + hdr = (struct ipv6_opt_hdr *)(nh + offset); + len = ((hdr->hdrlen + 1) << 3); + + if (offset + len > packet_len) + goto bad; + + offset += 2; + len -= 2; + + while (len > 0) { + int opttype = nh[offset]; + int optlen; + + if (opttype == type) + return offset; + + switch (opttype) { + case IPV6_TLV_PAD1: + optlen = 1; + break; + default: + optlen = nh[offset + 1] + 2; + if (optlen > len) + goto bad; + break; + } + offset += optlen; + len -= optlen; + } + /* not_found */ + bad: + return -1; +} +EXPORT_SYMBOL_GPL(ipv6_find_tlv); diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c new file mode 100644 index 000000000000..cf77f3abfd06 --- /dev/null +++ b/net/ipv6/exthdrs_offload.c @@ -0,0 +1,41 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * IPV6 Extension Header GSO/GRO support + */ +#include <net/protocol.h> +#include "ip6_offload.h" + +static const struct net_offload rthdr_offload = { + .flags = INET6_PROTO_GSO_EXTHDR, +}; + +static const struct net_offload dstopt_offload = { + .flags = INET6_PROTO_GSO_EXTHDR, +}; + +int __init ipv6_exthdrs_offload_init(void) +{ + int ret; + + ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); + if (!ret) + goto out; + + ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); + if (!ret) + goto out_rt; + +out: + return ret; + +out_rt: + inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + goto out; +} diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c new file mode 100644 index 000000000000..f26f0da7f095 --- /dev/null +++ b/net/ipv6/ip6_offload.c @@ -0,0 +1,282 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/socket.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/printk.h> + +#include <net/protocol.h> +#include <net/ipv6.h> + +#include "ip6_offload.h" + +static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) +{ + const struct net_offload *ops = NULL; + + for (;;) { + struct ipv6_opt_hdr *opth; + int len; + + if (proto != NEXTHDR_HOP) { + ops = rcu_dereference(inet6_offloads[proto]); + + if (unlikely(!ops)) + break; + + if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + + if (unlikely(!pskb_may_pull(skb, 8))) + break; + + opth = (void *)skb->data; + len = ipv6_optlen(opth); + + if (unlikely(!pskb_may_pull(skb, len))) + break; + + proto = opth->nexthdr; + __skb_pull(skb, len); + } + + return proto; +} + +static int ipv6_gso_send_check(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + const struct net_offload *ops; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[ + ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); + + if (likely(ops && ops->callbacks.gso_send_check)) { + skb_reset_transport_header(skb); + err = ops->callbacks.gso_send_check(skb); + } + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + const struct net_offload *ops; + int proto; + struct frag_hdr *fptr; + unsigned int unfrag_ip6hlen; + u8 *prevhdr; + int offset = 0; + + if (!(features & NETIF_F_V6_CSUM)) + features &= ~NETIF_F_SG; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + segs = ERR_PTR(-EPROTONOSUPPORT); + + proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[proto]); + if (likely(ops && ops->callbacks.gso_segment)) { + skb_reset_transport_header(skb); + segs = ops->callbacks.gso_segment(skb, features); + } + rcu_read_unlock(); + + if (IS_ERR(segs)) + goto out; + + for (skb = segs; skb; skb = skb->next) { + ipv6h = ipv6_hdr(skb); + ipv6h->payload_len = htons(skb->len - skb->mac_len - + sizeof(*ipv6h)); + if (proto == IPPROTO_UDP) { + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + fptr = (struct frag_hdr *)(skb_network_header(skb) + + unfrag_ip6hlen); + fptr->frag_off = htons(offset); + if (skb->next != NULL) + fptr->frag_off |= htons(IP6_MF); + offset += (ntohs(ipv6h->payload_len) - + sizeof(struct frag_hdr)); + } + } + +out: + return segs; +} + +static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + const struct net_offload *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct ipv6hdr *iph; + unsigned int nlen; + unsigned int hlen; + unsigned int off; + int flush = 1; + int proto; + __wsum csum; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } + + skb_gro_pull(skb, sizeof(*iph)); + skb_set_transport_header(skb, skb_gro_offset(skb)); + + flush += ntohs(iph->payload_len) != skb_gro_len(skb); + + rcu_read_lock(); + proto = iph->nexthdr; + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) { + __pskb_pull(skb, skb_gro_offset(skb)); + proto = ipv6_gso_pull_exthdrs(skb, proto); + skb_gro_pull(skb, -skb_transport_offset(skb)); + skb_reset_transport_header(skb); + __skb_push(skb, skb_gro_offset(skb)); + + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) + goto out_unlock; + + iph = ipv6_hdr(skb); + } + + NAPI_GRO_CB(skb)->proto = proto; + + flush--; + nlen = skb_network_header_len(skb); + + for (p = *head; p; p = p->next) { + const struct ipv6hdr *iph2; + __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */ + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ipv6_hdr(p); + first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; + + /* All fields must match except length and Traffic Class. */ + if (nlen != skb_network_header_len(p) || + (first_word & htonl(0xF00FFFFF)) || + memcmp(&iph->nexthdr, &iph2->nexthdr, + nlen - offsetof(struct ipv6hdr, nexthdr))) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + /* flush if Traffic Class fields are different */ + NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + + csum = skb->csum; + skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + + pp = ops->callbacks.gro_receive(head, skb); + + skb->csum = csum; + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int ipv6_gro_complete(struct sk_buff *skb) +{ + const struct net_offload *ops; + struct ipv6hdr *iph = ipv6_hdr(skb); + int err = -ENOSYS; + + iph->payload_len = htons(skb->len - skb_network_offset(skb) - + sizeof(*iph)); + + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) + goto out_unlock; + + err = ops->callbacks.gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + +static struct packet_offload ipv6_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IPV6), + .callbacks = { + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, + }, +}; + +static int __init ipv6_offload_init(void) +{ + + if (tcpv6_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); + if (udp_offload_init() < 0) + pr_crit("%s: Cannot add UDP protocol offload\n", __func__); + if (ipv6_exthdrs_offload_init() < 0) + pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); + + dev_add_offload(&ipv6_packet_offload); + return 0; +} + +fs_initcall(ipv6_offload_init); diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h new file mode 100644 index 000000000000..2e155c651b35 --- /dev/null +++ b/net/ipv6/ip6_offload.h @@ -0,0 +1,18 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ip6_offload_h +#define __ip6_offload_h + +int ipv6_exthdrs_offload_init(void); +int udp_offload_init(void); +int tcpv6_offload_init(void); + +#endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3deaa4e2e8e2..5552d13ae92f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -544,71 +544,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; - int found_rhdr = 0; - *nexthdr = &ipv6_hdr(skb)->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - found_rhdr = 1; - break; - case NEXTHDR_DEST: -#if IS_ENABLED(CONFIG_IPV6_MIP6) - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) - break; -#endif - if (found_rhdr) - return offset; - break; - default : - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + - offset); - } - - return offset; -} - -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static atomic_t ipv6_fragmentation_id; - int old, new; - - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } - } - do { - old = atomic_read(&ipv6_fragmentation_id); - new = old + 1; - if (!new) - new = 1; - } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - fhdr->identification = htonl(new); -} - int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ab4d05633bfd..bf3a549267d3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1568,7 +1568,7 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); if (data[IFLA_IPTUN_FLOWINFO]) - parms->flowinfo = nla_get_u32(data[IFLA_IPTUN_FLOWINFO]); + parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); if (data[IFLA_IPTUN_FLAGS]) parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c new file mode 100644 index 000000000000..c2e73e647e44 --- /dev/null +++ b/net/ipv6/output_core.c @@ -0,0 +1,76 @@ +/* + * IPv6 library code, needed by static components when full IPv6 support is + * not configured or static. These functions are needed by GSO/GRO implementation. + */ +#include <linux/export.h> +#include <net/ipv6.h> +#include <net/ip6_fib.h> + +void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static atomic_t ipv6_fragmentation_id; + int old, new; + +#if IS_ENABLED(CONFIG_IPV6) + if (rt && !(rt->dst.flags & DST_NOPEER)) { + struct inet_peer *peer; + struct net *net; + + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + if (peer) { + fhdr->identification = htonl(inet_getid(peer, 0)); + inet_putpeer(peer); + return; + } + } +#endif + do { + old = atomic_read(&ipv6_fragmentation_id); + new = old + 1; + if (!new) + new = 1; + } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); + fhdr->identification = htonl(new); +} +EXPORT_SYMBOL(ipv6_select_ident); + +int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = + (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); + unsigned int packet_len = skb->tail - skb->network_header; + int found_rhdr = 0; + *nexthdr = &ipv6_hdr(skb)->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + found_rhdr = 1; + break; + case NEXTHDR_DEST: +#if IS_ENABLED(CONFIG_IPV6_MIP6) + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + break; +#endif + if (found_rhdr) + return offset; + break; + default : + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); + } + + return offset; +} +EXPORT_SYMBOL(ip6_find_1stfragopt); diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 053082dfc93e..22d1bd4670da 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -25,7 +25,9 @@ #include <linux/spinlock.h> #include <net/protocol.h> +#if IS_ENABLED(CONFIG_IPV6) const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; +EXPORT_SYMBOL(inet6_protos); int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { @@ -50,3 +52,26 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol return ret; } EXPORT_SYMBOL(inet6_del_protocol); +#endif + +const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly; + +int inet6_add_offload(const struct net_offload *prot, unsigned char protocol) +{ + return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol], + NULL, prot) ? 0 : -1; +} +EXPORT_SYMBOL(inet6_add_offload); + +int inet6_del_offload(const struct net_offload *prot, unsigned char protocol) +{ + int ret; + + ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol], + prot, NULL) == prot) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(inet6_del_offload); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7bd2a061e511..ca6c2c8e71d2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -228,7 +228,7 @@ static int ipip6_tunnel_create(struct net_device *dev) goto out; ipip6_tunnel_clone_6rd(dev, sitn); - if (t->parms.i_flags & SIT_ISATAP) + if ((__force u16)t->parms.i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; err = register_netdevice(dev); @@ -1240,10 +1240,10 @@ static void ipip6_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]); + parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]); + parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) { parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -1258,7 +1258,7 @@ static void ipip6_netlink_parms(struct nlattr *data[], parms->iph.frag_off = htons(IP_DF); if (data[IFLA_IPTUN_FLAGS]) - parms->i_flags = nla_get_u16(data[IFLA_IPTUN_FLAGS]); + parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); } static int ipip6_newlink(struct net *src_net, struct net_device *dev, @@ -1337,7 +1337,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || - nla_put_u16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) + nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; return 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c73d0ebde9c8..6c0f2526f3f1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -77,9 +77,6 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static void __tcp_v6_send_check(struct sk_buff *skb, - const struct in6_addr *saddr, - const struct in6_addr *daddr); static const struct inet_connection_sock_af_ops ipv6_mapped; static const struct inet_connection_sock_af_ops ipv6_specific; @@ -119,14 +116,6 @@ static void tcp_v6_hash(struct sock *sk) } } -static __inline__ __sum16 tcp_v6_check(int len, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - __wsum base) -{ - return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); -} - static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, @@ -722,94 +711,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { }; #endif -static void __tcp_v6_send_check(struct sk_buff *skb, - const struct in6_addr *saddr, const struct in6_addr *daddr) -{ - struct tcphdr *th = tcp_hdr(skb); - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v6_check(skb->len, saddr, daddr, - csum_partial(th, th->doff << 2, - skb->csum)); - } -} - -static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - __tcp_v6_send_check(skb, &np->saddr, &np->daddr); -} - -static int tcp_v6_gso_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - ipv6h = ipv6_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); - return 0; -} - -static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) -{ - const struct ipv6hdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -static int tcp6_gro_complete(struct sk_buff *skb) -{ - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), - &iph->saddr, &iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - - return tcp_gro_complete(skb); -} - static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) { @@ -2066,10 +1967,6 @@ static const struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -2124,10 +2021,10 @@ int __init tcpv6_init(void) out: return ret; -out_tcpv6_protocol: - inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); +out_tcpv6_protocol: + inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); goto out; } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c new file mode 100644 index 000000000000..2ec6bf6a0aa0 --- /dev/null +++ b/net/ipv6/tcpv6_offload.c @@ -0,0 +1,95 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TCPv6 GSO/GRO support + */ +#include <linux/skbuff.h> +#include <net/protocol.h> +#include <net/tcp.h> +#include <net/ip6_checksum.h> +#include "ip6_offload.h" + +static int tcp_v6_gso_send_check(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + ipv6h = ipv6_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); + return 0; +} + +static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + + case CHECKSUM_NONE: + wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), + IPPROTO_TCP, 0)); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + return tcp_gro_receive(head, skb); +} + +static int tcp6_gro_complete(struct sk_buff *skb) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), + &iph->saddr, &iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + + return tcp_gro_complete(skb); +} + +static const struct net_offload tcpv6_offload = { + .callbacks = { + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, + }, +}; + +int __init tcpv6_offload_init(void) +{ + return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); +} diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fc9997260a6b..dfaa29b8b293 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1343,103 +1343,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, } #endif -static int udp6_ufo_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) - return -EINVAL; - - ipv6h = ipv6_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - return 0; -} - -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - unsigned int unfrag_ip6hlen, unfrag_len; - struct frag_hdr *fptr; - u8 *mac_start, *prevhdr; - u8 nexthdr; - u8 frag_hdr_sz = sizeof(struct frag_hdr); - int offset; - __wsum csum; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - /* Check if there is enough headroom to insert fragment header. */ - if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && - pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) - goto out; - - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + - unfrag_ip6hlen; - mac_start = skb_mac_header(skb); - memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); - - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - -out: - return segs; -} - static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, - .gso_send_check = udp6_ufo_send_check, - .gso_segment = udp6_ufo_fragment, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c new file mode 100644 index 000000000000..8e01c44a987c --- /dev/null +++ b/net/ipv6/udp_offload.c @@ -0,0 +1,119 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * UDPv6 GSO support + */ +#include <linux/skbuff.h> +#include <net/protocol.h> +#include <net/ipv6.h> +#include <net/udp.h> +#include "ip6_offload.h" + +static int udp6_ufo_send_check(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + struct udphdr *uh; + + if (!pskb_may_pull(skb, sizeof(*uh))) + return -EINVAL; + + ipv6h = ipv6_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + return 0; +} + +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + unsigned int unfrag_ip6hlen, unfrag_len; + struct frag_hdr *fptr; + u8 *mac_start, *prevhdr; + u8 nexthdr; + u8 frag_hdr_sz = sizeof(struct frag_hdr); + int offset; + __wsum csum; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot + * do checksum of UDP packets sent as multiple IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + /* Check if there is enough headroom to insert fragment header. */ + if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && + pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) + goto out; + + /* Find the unfragmentable header and shift it left by frag_hdr_sz + * bytes to insert fragment header. + */ + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; + unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + + unfrag_ip6hlen; + mac_start = skb_mac_header(skb); + memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); + + skb->mac_header -= frag_hdr_sz; + skb->network_header -= frag_hdr_sz; + + fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); + fptr->nexthdr = nexthdr; + fptr->reserved = 0; + ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + + /* Fragment the skb. ipv6 header and the remaining fields of the + * fragment header are updated in ipv6_gso_segment() + */ + segs = skb_segment(skb, features); + +out: + return segs; +} +static const struct net_offload udpv6_offload = { + .callbacks = { + .gso_send_check = udp6_ufo_send_check, + .gso_segment = udp6_ufo_fragment, + }, +}; + +int __init udp_offload_init(void) +{ + return inet6_add_offload(&udpv6_offload, IPPROTO_UDP); +} |