diff options
30 files changed, 795 insertions, 378 deletions
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 54a7c20d9fa0..84e7e0909def 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -111,184 +111,168 @@ static bool igb_sgmii_uses_mdio_82575(struct e1000_hw *hw) return ext_mdio; } -static s32 igb_get_invariants_82575(struct e1000_hw *hw) +/** + * igb_init_phy_params_82575 - Init PHY func ptrs. + * @hw: pointer to the HW structure + **/ +static s32 igb_init_phy_params_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - struct e1000_nvm_info *nvm = &hw->nvm; - struct e1000_mac_info *mac = &hw->mac; - struct e1000_dev_spec_82575 * dev_spec = &hw->dev_spec._82575; - u32 eecd; - s32 ret_val; - u16 size; - u32 ctrl_ext = 0; + s32 ret_val = 0; + u32 ctrl_ext; - switch (hw->device_id) { - case E1000_DEV_ID_82575EB_COPPER: - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82575GB_QUAD_COPPER: - mac->type = e1000_82575; - break; - case E1000_DEV_ID_82576: - case E1000_DEV_ID_82576_NS: - case E1000_DEV_ID_82576_NS_SERDES: - case E1000_DEV_ID_82576_FIBER: - case E1000_DEV_ID_82576_SERDES: - case E1000_DEV_ID_82576_QUAD_COPPER: - case E1000_DEV_ID_82576_QUAD_COPPER_ET2: - case E1000_DEV_ID_82576_SERDES_QUAD: - mac->type = e1000_82576; - break; - case E1000_DEV_ID_82580_COPPER: - case E1000_DEV_ID_82580_FIBER: - case E1000_DEV_ID_82580_QUAD_FIBER: - case E1000_DEV_ID_82580_SERDES: - case E1000_DEV_ID_82580_SGMII: - case E1000_DEV_ID_82580_COPPER_DUAL: - case E1000_DEV_ID_DH89XXCC_SGMII: - case E1000_DEV_ID_DH89XXCC_SERDES: - case E1000_DEV_ID_DH89XXCC_BACKPLANE: - case E1000_DEV_ID_DH89XXCC_SFP: - mac->type = e1000_82580; - break; - case E1000_DEV_ID_I350_COPPER: - case E1000_DEV_ID_I350_FIBER: - case E1000_DEV_ID_I350_SERDES: - case E1000_DEV_ID_I350_SGMII: - mac->type = e1000_i350; - break; - case E1000_DEV_ID_I210_COPPER: - case E1000_DEV_ID_I210_COPPER_OEM1: - case E1000_DEV_ID_I210_COPPER_IT: - case E1000_DEV_ID_I210_FIBER: - case E1000_DEV_ID_I210_SERDES: - case E1000_DEV_ID_I210_SGMII: - mac->type = e1000_i210; - break; - case E1000_DEV_ID_I211_COPPER: - mac->type = e1000_i211; - break; - default: - return -E1000_ERR_MAC_INIT; - break; + if (hw->phy.media_type != e1000_media_type_copper) { + phy->type = e1000_phy_none; + goto out; } - /* Set media type */ - /* - * The 82575 uses bits 22:23 for link mode. The mode can be changed - * based on the EEPROM. We cannot rely upon device ID. There - * is no distinguishable difference between fiber and internal - * SerDes mode on the 82575. There can be an external PHY attached - * on the SGMII interface. For this, we'll set sgmii_active to true. - */ - phy->media_type = e1000_media_type_copper; - dev_spec->sgmii_active = false; + phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; + phy->reset_delay_us = 100; ctrl_ext = rd32(E1000_CTRL_EXT); - switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { - case E1000_CTRL_EXT_LINK_MODE_SGMII: - dev_spec->sgmii_active = true; - break; - case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: - case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: - hw->phy.media_type = e1000_media_type_internal_serdes; - break; - default: - break; + + if (igb_sgmii_active_82575(hw)) { + phy->ops.reset = igb_phy_hw_reset_sgmii_82575; + ctrl_ext |= E1000_CTRL_I2C_ENA; + } else { + phy->ops.reset = igb_phy_hw_reset; + ctrl_ext &= ~E1000_CTRL_I2C_ENA; } - /* Set mta register count */ - mac->mta_reg_count = 128; - /* Set rar entry count */ - switch (mac->type) { - case e1000_82576: - mac->rar_entry_count = E1000_RAR_ENTRIES_82576; + wr32(E1000_CTRL_EXT, ctrl_ext); + igb_reset_mdicnfg_82580(hw); + + if (igb_sgmii_active_82575(hw) && !igb_sgmii_uses_mdio_82575(hw)) { + phy->ops.read_reg = igb_read_phy_reg_sgmii_82575; + phy->ops.write_reg = igb_write_phy_reg_sgmii_82575; + } else { + switch (hw->mac.type) { + case e1000_82580: + case e1000_i350: + phy->ops.read_reg = igb_read_phy_reg_82580; + phy->ops.write_reg = igb_write_phy_reg_82580; + break; + case e1000_i210: + case e1000_i211: + phy->ops.read_reg = igb_read_phy_reg_gs40g; + phy->ops.write_reg = igb_write_phy_reg_gs40g; + break; + default: + phy->ops.read_reg = igb_read_phy_reg_igp; + phy->ops.write_reg = igb_write_phy_reg_igp; + } + } + + /* set lan id */ + hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >> + E1000_STATUS_FUNC_SHIFT; + + /* Set phy->phy_addr and phy->id. */ + ret_val = igb_get_phy_id_82575(hw); + if (ret_val) + return ret_val; + + /* Verify phy id and set remaining function pointers */ + switch (phy->id) { + case I347AT4_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1111_I_PHY_ID: + phy->type = e1000_phy_m88; + phy->ops.get_phy_info = igb_get_phy_info_m88; + if (phy->id == I347AT4_E_PHY_ID || + phy->id == M88E1112_E_PHY_ID) + phy->ops.get_cable_length = + igb_get_cable_length_m88_gen2; + else + phy->ops.get_cable_length = igb_get_cable_length_m88; + phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88; break; - case e1000_82580: - mac->rar_entry_count = E1000_RAR_ENTRIES_82580; + case IGP03E1000_E_PHY_ID: + phy->type = e1000_phy_igp_3; + phy->ops.get_phy_info = igb_get_phy_info_igp; + phy->ops.get_cable_length = igb_get_cable_length_igp_2; + phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_igp; + phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82575; + phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state; break; - case e1000_i350: - mac->rar_entry_count = E1000_RAR_ENTRIES_I350; + case I82580_I_PHY_ID: + case I350_I_PHY_ID: + phy->type = e1000_phy_82580; + phy->ops.force_speed_duplex = + igb_phy_force_speed_duplex_82580; + phy->ops.get_cable_length = igb_get_cable_length_82580; + phy->ops.get_phy_info = igb_get_phy_info_82580; + phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580; + phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580; break; - default: - mac->rar_entry_count = E1000_RAR_ENTRIES_82575; + case I210_I_PHY_ID: + phy->type = e1000_phy_i210; + phy->ops.check_polarity = igb_check_polarity_m88; + phy->ops.get_phy_info = igb_get_phy_info_m88; + phy->ops.get_cable_length = igb_get_cable_length_m88_gen2; + phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580; + phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580; + phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88; break; + default: + ret_val = -E1000_ERR_PHY; + goto out; } - /* reset */ - if (mac->type >= e1000_82580) - mac->ops.reset_hw = igb_reset_hw_82580; - else - mac->ops.reset_hw = igb_reset_hw_82575; - if (mac->type >= e1000_i210) { - mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_i210; - mac->ops.release_swfw_sync = igb_release_swfw_sync_i210; - } else { - mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_82575; - mac->ops.release_swfw_sync = igb_release_swfw_sync_82575; - } +out: + return ret_val; +} - /* Set if part includes ASF firmware */ - mac->asf_firmware_present = true; - /* Set if manageability features are enabled. */ - mac->arc_subsystem_valid = - (rd32(E1000_FWSM) & E1000_FWSM_MODE_MASK) - ? true : false; - /* enable EEE on i350 parts and later parts */ - if (mac->type >= e1000_i350) - dev_spec->eee_disable = false; - else - dev_spec->eee_disable = true; - /* physical interface link setup */ - mac->ops.setup_physical_interface = - (hw->phy.media_type == e1000_media_type_copper) - ? igb_setup_copper_link_82575 - : igb_setup_serdes_link_82575; +/** + * igb_init_nvm_params_82575 - Init NVM func ptrs. + * @hw: pointer to the HW structure + **/ +s32 igb_init_nvm_params_82575(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = rd32(E1000_EECD); + u16 size; - /* NVM initialization */ - eecd = rd32(E1000_EECD); size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> E1000_EECD_SIZE_EX_SHIFT); - - /* - * Added to a constant, "size" becomes the left-shift value + /* Added to a constant, "size" becomes the left-shift value * for setting word_size. */ size += NVM_WORD_SIZE_BASE_SHIFT; - /* - * Check for invalid size + /* Just in case size is out of range, cap it to the largest + * EEPROM size supported */ - if ((hw->mac.type == e1000_82576) && (size > 15)) { - pr_notice("The NVM size is not valid, defaulting to 32K\n"); + if (size > 15) size = 15; - } nvm->word_size = 1 << size; if (hw->mac.type < e1000_i210) { - nvm->opcode_bits = 8; - nvm->delay_usec = 1; + nvm->opcode_bits = 8; + nvm->delay_usec = 1; + switch (nvm->override) { case e1000_nvm_override_spi_large: - nvm->page_size = 32; + nvm->page_size = 32; nvm->address_bits = 16; break; case e1000_nvm_override_spi_small: - nvm->page_size = 8; + nvm->page_size = 8; nvm->address_bits = 8; break; default: - nvm->page_size = eecd - & E1000_EECD_ADDR_BITS ? 32 : 8; - nvm->address_bits = eecd - & E1000_EECD_ADDR_BITS ? 16 : 8; + nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; + nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? + 16 : 8; break; } if (nvm->word_size == (1 << 15)) nvm->page_size = 128; nvm->type = e1000_nvm_eeprom_spi; - } else + } else { nvm->type = e1000_nvm_flash_hw; + } /* NVM Function Pointers */ switch (hw->mac.type) { @@ -345,118 +329,176 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) break; } - /* if part supports SR-IOV then initialize mailbox parameters */ + return 0; +} + +/** + * igb_init_mac_params_82575 - Init MAC func ptrs. + * @hw: pointer to the HW structure + **/ +static s32 igb_init_mac_params_82575(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + + /* Set mta register count */ + mac->mta_reg_count = 128; + /* Set rar entry count */ switch (mac->type) { case e1000_82576: + mac->rar_entry_count = E1000_RAR_ENTRIES_82576; + break; + case e1000_82580: + mac->rar_entry_count = E1000_RAR_ENTRIES_82580; + break; case e1000_i350: - igb_init_mbx_params_pf(hw); + mac->rar_entry_count = E1000_RAR_ENTRIES_I350; break; default: + mac->rar_entry_count = E1000_RAR_ENTRIES_82575; break; } + /* reset */ + if (mac->type >= e1000_82580) + mac->ops.reset_hw = igb_reset_hw_82580; + else + mac->ops.reset_hw = igb_reset_hw_82575; - /* setup PHY parameters */ - if (phy->media_type != e1000_media_type_copper) { - phy->type = e1000_phy_none; - return 0; - } - - phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; - phy->reset_delay_us = 100; - - ctrl_ext = rd32(E1000_CTRL_EXT); + if (mac->type >= e1000_i210) { + mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_i210; + mac->ops.release_swfw_sync = igb_release_swfw_sync_i210; - /* PHY function pointers */ - if (igb_sgmii_active_82575(hw)) { - phy->ops.reset = igb_phy_hw_reset_sgmii_82575; - ctrl_ext |= E1000_CTRL_I2C_ENA; } else { - phy->ops.reset = igb_phy_hw_reset; - ctrl_ext &= ~E1000_CTRL_I2C_ENA; + mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_82575; + mac->ops.release_swfw_sync = igb_release_swfw_sync_82575; } - wr32(E1000_CTRL_EXT, ctrl_ext); - igb_reset_mdicnfg_82580(hw); - - if (igb_sgmii_active_82575(hw) && !igb_sgmii_uses_mdio_82575(hw)) { - phy->ops.read_reg = igb_read_phy_reg_sgmii_82575; - phy->ops.write_reg = igb_write_phy_reg_sgmii_82575; - } else if ((hw->mac.type == e1000_82580) - || (hw->mac.type == e1000_i350)) { - phy->ops.read_reg = igb_read_phy_reg_82580; - phy->ops.write_reg = igb_write_phy_reg_82580; - } else if (hw->phy.type >= e1000_phy_i210) { - phy->ops.read_reg = igb_read_phy_reg_gs40g; - phy->ops.write_reg = igb_write_phy_reg_gs40g; - } else { - phy->ops.read_reg = igb_read_phy_reg_igp; - phy->ops.write_reg = igb_write_phy_reg_igp; - } + /* Set if part includes ASF firmware */ + mac->asf_firmware_present = true; + /* Set if manageability features are enabled. */ + mac->arc_subsystem_valid = + (rd32(E1000_FWSM) & E1000_FWSM_MODE_MASK) + ? true : false; + /* enable EEE on i350 parts and later parts */ + if (mac->type >= e1000_i350) + dev_spec->eee_disable = false; + else + dev_spec->eee_disable = true; + /* physical interface link setup */ + mac->ops.setup_physical_interface = + (hw->phy.media_type == e1000_media_type_copper) + ? igb_setup_copper_link_82575 + : igb_setup_serdes_link_82575; - /* set lan id */ - hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >> - E1000_STATUS_FUNC_SHIFT; + return 0; +} - /* Set phy->phy_addr and phy->id. */ - ret_val = igb_get_phy_id_82575(hw); - if (ret_val) - return ret_val; +static s32 igb_get_invariants_82575(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + struct e1000_dev_spec_82575 * dev_spec = &hw->dev_spec._82575; + s32 ret_val; + u32 ctrl_ext = 0; - /* Verify phy id and set remaining function pointers */ - switch (phy->id) { - case I347AT4_E_PHY_ID: - case M88E1112_E_PHY_ID: - case M88E1111_I_PHY_ID: - phy->type = e1000_phy_m88; - phy->ops.get_phy_info = igb_get_phy_info_m88; + switch (hw->device_id) { + case E1000_DEV_ID_82575EB_COPPER: + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82575GB_QUAD_COPPER: + mac->type = e1000_82575; + break; + case E1000_DEV_ID_82576: + case E1000_DEV_ID_82576_NS: + case E1000_DEV_ID_82576_NS_SERDES: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: + case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: + case E1000_DEV_ID_82576_SERDES_QUAD: + mac->type = e1000_82576; + break; + case E1000_DEV_ID_82580_COPPER: + case E1000_DEV_ID_82580_FIBER: + case E1000_DEV_ID_82580_QUAD_FIBER: + case E1000_DEV_ID_82580_SERDES: + case E1000_DEV_ID_82580_SGMII: + case E1000_DEV_ID_82580_COPPER_DUAL: + case E1000_DEV_ID_DH89XXCC_SGMII: + case E1000_DEV_ID_DH89XXCC_SERDES: + case E1000_DEV_ID_DH89XXCC_BACKPLANE: + case E1000_DEV_ID_DH89XXCC_SFP: + mac->type = e1000_82580; + break; + case E1000_DEV_ID_I350_COPPER: + case E1000_DEV_ID_I350_FIBER: + case E1000_DEV_ID_I350_SERDES: + case E1000_DEV_ID_I350_SGMII: + mac->type = e1000_i350; + break; + case E1000_DEV_ID_I210_COPPER: + case E1000_DEV_ID_I210_COPPER_OEM1: + case E1000_DEV_ID_I210_COPPER_IT: + case E1000_DEV_ID_I210_FIBER: + case E1000_DEV_ID_I210_SERDES: + case E1000_DEV_ID_I210_SGMII: + mac->type = e1000_i210; + break; + case E1000_DEV_ID_I211_COPPER: + mac->type = e1000_i211; + break; + default: + return -E1000_ERR_MAC_INIT; + break; + } - if (phy->id == I347AT4_E_PHY_ID || - phy->id == M88E1112_E_PHY_ID) - phy->ops.get_cable_length = igb_get_cable_length_m88_gen2; - else - phy->ops.get_cable_length = igb_get_cable_length_m88; + /* Set media type */ + /* + * The 82575 uses bits 22:23 for link mode. The mode can be changed + * based on the EEPROM. We cannot rely upon device ID. There + * is no distinguishable difference between fiber and internal + * SerDes mode on the 82575. There can be an external PHY attached + * on the SGMII interface. For this, we'll set sgmii_active to true. + */ + hw->phy.media_type = e1000_media_type_copper; + dev_spec->sgmii_active = false; - if (phy->id == I210_I_PHY_ID) { - phy->ops.get_cable_length = - igb_get_cable_length_m88_gen2; - phy->ops.set_d0_lplu_state = - igb_set_d0_lplu_state_82580; - phy->ops.set_d3_lplu_state = - igb_set_d3_lplu_state_82580; - } - phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88; + ctrl_ext = rd32(E1000_CTRL_EXT); + switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { + case E1000_CTRL_EXT_LINK_MODE_SGMII: + dev_spec->sgmii_active = true; break; - case IGP03E1000_E_PHY_ID: - phy->type = e1000_phy_igp_3; - phy->ops.get_phy_info = igb_get_phy_info_igp; - phy->ops.get_cable_length = igb_get_cable_length_igp_2; - phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_igp; - phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82575; - phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state; + case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: + case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: + hw->phy.media_type = e1000_media_type_internal_serdes; break; - case I82580_I_PHY_ID: - case I350_I_PHY_ID: - phy->type = e1000_phy_82580; - phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_82580; - phy->ops.get_cable_length = igb_get_cable_length_82580; - phy->ops.get_phy_info = igb_get_phy_info_82580; - phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580; - phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580; + default: break; - case I210_I_PHY_ID: - phy->type = e1000_phy_i210; - phy->ops.get_phy_info = igb_get_phy_info_m88; - phy->ops.check_polarity = igb_check_polarity_m88; - phy->ops.get_cable_length = igb_get_cable_length_m88_gen2; - phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580; - phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580; - phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88; + } + + /* mac initialization and operations */ + ret_val = igb_init_mac_params_82575(hw); + if (ret_val) + goto out; + + /* NVM initialization */ + ret_val = igb_init_nvm_params_82575(hw); + if (ret_val) + goto out; + + /* if part supports SR-IOV then initialize mailbox parameters */ + switch (mac->type) { + case e1000_82576: + case e1000_i350: + igb_init_mbx_params_pf(hw); break; default: - return -E1000_ERR_PHY; + break; } - return 0; + /* setup PHY parameters */ + ret_val = igb_init_phy_params_82575(hw); + +out: + return ret_val; } /** diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 4b78053592ba..d27edbc63923 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -139,8 +139,6 @@ struct vf_data_storage { #define IGB_RX_HDR_LEN IGB_RXBUFFER_256 #define IGB_RX_BUFSZ IGB_RXBUFFER_2048 -/* How many Tx Descriptors do we need to call netif_wake_queue ? */ -#define IGB_TX_QUEUE_WAKE 16 /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */ @@ -169,6 +167,17 @@ enum igb_tx_flags { #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGB_TX_FLAGS_VLAN_SHIFT 16 +/* + * The largest size we can write to the descriptor is 65535. In order to + * maintain a power of two alignment we have to limit ourselves to 32K. + */ +#define IGB_MAX_TXD_PWR 15 +#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR) + +/* Tx Descriptors needed, worst case */ +#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD) +#define DESC_NEEDED (MAX_SKB_FRAGS + 4) + /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer */ struct igb_tx_buffer { @@ -275,10 +284,18 @@ struct igb_q_vector { enum e1000_ring_flags_t { IGB_RING_FLAG_RX_SCTP_CSUM, IGB_RING_FLAG_RX_LB_VLAN_BSWAP, + IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, IGB_RING_FLAG_TX_CTX_IDX, IGB_RING_FLAG_TX_DETECT_HANG }; +#define ring_uses_build_skb(ring) \ + test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) +#define set_ring_build_skb_enabled(ring) \ + set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) +#define clear_ring_build_skb_enabled(ring) \ + clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) + #define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) #define IGB_RX_DESC(R, i) \ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 1aaf19351863..ed79a1c53b59 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3354,6 +3354,20 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, wr32(E1000_RXDCTL(reg_idx), rxdctl); } +static void igb_set_rx_buffer_len(struct igb_adapter *adapter, + struct igb_ring *rx_ring) +{ +#define IGB_MAX_BUILD_SKB_SIZE \ + (SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) - \ + (NET_SKB_PAD + NET_IP_ALIGN + IGB_TS_HDR_LEN)) + + /* set build_skb flag */ + if (adapter->max_frame_size <= IGB_MAX_BUILD_SKB_SIZE) + set_ring_build_skb_enabled(rx_ring); + else + clear_ring_build_skb_enabled(rx_ring); +} + /** * igb_configure_rx - Configure receive Unit after Reset * @adapter: board private structure @@ -3373,8 +3387,11 @@ static void igb_configure_rx(struct igb_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) - igb_configure_rx_ring(adapter, adapter->rx_ring[i]); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *rx_ring = adapter->rx_ring[i]; + igb_set_rx_buffer_len(adapter, rx_ring); + igb_configure_rx_ring(adapter, rx_ring); + } } /** @@ -4417,13 +4434,6 @@ static void igb_tx_olinfo_status(struct igb_ring *tx_ring, tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); } -/* - * The largest size we can write to the descriptor is 65535. In order to - * maintain a power of two alignment we have to limit ourselves to 32K. - */ -#define IGB_MAX_TXD_PWR 15 -#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) - static void igb_tx_map(struct igb_ring *tx_ring, struct igb_tx_buffer *first, const u8 hdr_len) @@ -4592,15 +4602,25 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; + u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = vlan_get_protocol(skb); u8 hdr_len = 0; - /* need: 1 descriptor per page, + /* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD, + * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, - * + 1 desc for skb->data, * + 1 desc for context descriptor, - * otherwise try next time */ - if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) { + * otherwise try next time + */ + if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { + unsigned short f; + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + } else { + count += skb_shinfo(skb)->nr_frags; + } + + if (igb_maybe_stop_tx(tx_ring, count + 3)) { /* this is a hard error */ return NETDEV_TX_BUSY; } @@ -4642,7 +4662,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, igb_tx_map(tx_ring, first, hdr_len); /* Make sure there is space in the ring for the next send. */ - igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); + igb_maybe_stop_tx(tx_ring, DESC_NEEDED); return NETDEV_TX_OK; @@ -6046,9 +6066,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } } +#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && - igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { + igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -6097,6 +6118,41 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, DMA_FROM_DEVICE); } +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + struct page *page, + unsigned int truesize) +{ + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= IGB_RX_BUFSZ; + + /* since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unnecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + /** * igb_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -6119,6 +6175,11 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, { struct page *page = rx_buffer->page; unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = IGB_RX_BUFSZ; +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); +#endif if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) { unsigned char *va = page_address(page) + rx_buffer->page_offset; @@ -6141,38 +6202,88 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, } skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, IGB_RX_BUFSZ); + rx_buffer->page_offset, size, truesize); - /* avoid re-using remote pages */ - if (unlikely(page_to_nid(page) != numa_node_id())) - return false; + return igb_can_reuse_rx_page(rx_buffer, page, truesize); +} +static struct sk_buff *igb_build_rx_buffer(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc) +{ + struct igb_rx_buffer *rx_buffer; + struct sk_buff *skb; + struct page *page; + void *page_addr; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); #if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + unsigned int truesize = IGB_RX_BUFSZ; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(NET_SKB_PAD + + NET_IP_ALIGN + + size); +#endif - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= IGB_RX_BUFSZ; + /* If we spanned a buffer we have a huge mess so test for it */ + BUG_ON(unlikely(!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))); - /* - * since we are the only owner of the page and we need to - * increment it, just set the value to 2 in order to avoid - * an unnecessary locked operation - */ - atomic_set(&page->_count, 2); -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += SKB_DATA_ALIGN(size); + /* Guarantee this function can be used by verifying buffer sizes */ + BUILD_BUG_ON(SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) < (NET_SKB_PAD + + NET_IP_ALIGN + + IGB_TS_HDR_LEN + + ETH_FRAME_LEN + + ETH_FCS_LEN)); - if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) - return false; + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + page = rx_buffer->page; + prefetchw(page); - /* bump ref count on page before it is given to the stack */ - get_page(page); + page_addr = page_address(page) + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr + NET_SKB_PAD + NET_IP_ALIGN); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES + NET_SKB_PAD + NET_IP_ALIGN); #endif - return true; + /* build an skb to around the page buffer */ + skb = build_skb(page_addr, truesize); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_failed++; + return NULL; + } + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + + /* update pointers within the skb to store the data */ + skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); + __skb_put(skb, size); + + /* pull timestamp out of packet data */ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); + __skb_pull(skb, IGB_TS_HDR_LEN); + } + + if (igb_can_reuse_rx_page(rx_buffer, page, truesize)) { + /* hand second half of page back to the ring */ + igb_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buffer->dma = 0; + rx_buffer->page = NULL; + + return skb; } static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, @@ -6184,13 +6295,6 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - /* - * This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc until we know the - * RXD_STAT_DD bit is set - */ - rmb(); - page = rx_buffer->page; prefetchw(page); @@ -6590,8 +6694,17 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) break; + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); + /* retrieve a buffer from the ring */ - skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); + if (ring_uses_build_skb(rx_ring)) + skb = igb_build_rx_buffer(rx_ring, rx_desc); + else + skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); /* exit if we failed to retrieve a buffer */ if (!skb) @@ -6678,6 +6791,14 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, return true; } +static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring) +{ + if (ring_uses_build_skb(rx_ring)) + return NET_SKB_PAD + NET_IP_ALIGN; + else + return 0; +} + /** * igb_alloc_rx_buffers - Replace used receive buffers; packet split * @adapter: address of board private structure @@ -6704,7 +6825,9 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) * Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + + bi->page_offset + + igb_rx_offset(rx_ring)); rx_desc++; bi++; @@ -7608,7 +7731,7 @@ static DEFINE_SPINLOCK(i2c_clients_lock); * @adapter: adapter struct * @dev_addr: device address of i2c needed. */ -struct i2c_client * +static struct i2c_client * igb_get_i2c_client(struct igb_adapter *adapter, u8 dev_addr) { ulong flags; @@ -7631,13 +7754,8 @@ igb_get_i2c_client(struct igb_adapter *adapter, u8 dev_addr) } } - /* no client_list found, create a new one as long as - * irqs are not disabled - */ - if (unlikely(irqs_disabled())) - goto exit; - - client_list = kzalloc(sizeof(*client_list), GFP_KERNEL); + /* no client_list found, create a new one */ + client_list = kzalloc(sizeof(*client_list), GFP_ATOMIC); if (client_list == NULL) goto exit; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1c0efcb7920f..1d5e093e988a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4480,38 +4480,56 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) hw->subsystem_vendor_id = pdev->subsystem_vendor; hw->subsystem_device_id = pdev->subsystem_device; - /* Set capability flags */ + /* Set common capability flags and settings */ rss = min_t(int, IXGBE_MAX_RSS_INDICES, num_online_cpus()); adapter->ring_feature[RING_F_RSS].limit = rss; + adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + adapter->ring_feature[RING_F_FDIR].limit = IXGBE_MAX_FDIR_INDICES; + adapter->max_q_vectors = MAX_Q_VECTORS_82599; + adapter->atr_sample_rate = 20; + adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K; +#ifdef CONFIG_IXGBE_DCA + adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; +#endif +#ifdef IXGBE_FCOE + adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE; + adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; +#ifdef CONFIG_IXGBE_DCB + /* Default traffic class to use for FCoE */ + adapter->fcoe.up = IXGBE_FCOE_DEFTC; +#endif /* CONFIG_IXGBE_DCB */ +#endif /* IXGBE_FCOE */ + + /* Set MAC specific capability flags and exceptions */ switch (hw->mac.type) { case ixgbe_mac_82598EB: + adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE; + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; + if (hw->device_id == IXGBE_DEV_ID_82598AT) adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE; + adapter->max_q_vectors = MAX_Q_VECTORS_82598; + adapter->ring_feature[RING_F_FDIR].limit = 0; + adapter->atr_sample_rate = 0; + adapter->fdir_pballoc = 0; +#ifdef IXGBE_FCOE + adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE; + adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; +#ifdef CONFIG_IXGBE_DCB + adapter->fcoe.up = 0; +#endif /* IXGBE_DCB */ +#endif /* IXGBE_FCOE */ + break; + case ixgbe_mac_82599EB: + if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; break; case ixgbe_mac_X540: fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); if (fwsm & IXGBE_FWSM_TS_ENABLED) adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; - case ixgbe_mac_82599EB: - adapter->max_q_vectors = MAX_Q_VECTORS_82599; - adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; - adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; - if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) - adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; - /* Flow Director hash filters enabled */ - adapter->atr_sample_rate = 20; - adapter->ring_feature[RING_F_FDIR].limit = - IXGBE_MAX_FDIR_INDICES; - adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K; -#ifdef IXGBE_FCOE - adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE; - adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; -#ifdef CONFIG_IXGBE_DCB - /* Default traffic class to use for FCoE */ - adapter->fcoe.up = IXGBE_FCOE_DEFTC; -#endif -#endif /* IXGBE_FCOE */ break; default: break; diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index a4a62e170ec0..fc1687ea4a42 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -751,16 +751,6 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) return 0; } -static int at86rf230_suspend(struct spi_device *spi, pm_message_t message) -{ - return 0; -} - -static int at86rf230_resume(struct spi_device *spi) -{ - return 0; -} - static int at86rf230_fill_data(struct spi_device *spi) { struct at86rf230_local *lp = spi_get_drvdata(spi); @@ -948,8 +938,6 @@ static struct spi_driver at86rf230_driver = { }, .probe = at86rf230_probe, .remove = at86rf230_remove, - .suspend = at86rf230_suspend, - .resume = at86rf230_resume, }; module_spi_driver(at86rf230_driver); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0b2706abe3e3..4fd754e74eb2 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1805,8 +1805,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* the filter instructions are constructed assuming a four-byte PPP header on each packet */ if (ppp->pass_filter || ppp->active_filter) { - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto err; *skb_push(skb, 2) = 0; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 5ac32123035a..3dd39340430e 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -41,7 +41,7 @@ enum { NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ - NETIF_F_GSO_RESERVED1, /* ... free (fill GSO_MASK to 8 bits) */ + NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ = NETIF_F_GSO_LAST, @@ -102,6 +102,7 @@ enum { #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) +#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9deb672d999f..920361bc27e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2671,6 +2671,8 @@ extern void netdev_upper_dev_unlink(struct net_device *dev, extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); +extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features); static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9da99520ccd5..821c7f45d2a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,6 +314,8 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + SKB_GSO_GRE = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -804,6 +806,16 @@ static inline int skb_cloned(const struct sk_buff *skb) (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(pri & __GFP_WAIT); + + if (skb_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check @@ -2722,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) } #endif +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. */ +struct skb_gso_cb { + int mac_offset; +}; +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/core/dev.c b/net/core/dev.c index f44473696b8b..6ad37896a324 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2327,37 +2327,20 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/* openvswitch calls this on rx path, so we need a different check. - */ -static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) -{ - if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; - else - return skb->ip_summed == CHECKSUM_NONE; -} - /** - * __skb_gso_segment - Perform segmentation on skb. + * skb_mac_gso_segment - mac layer segmentation handler. * @skb: buffer to segment * @features: features for the output path (see dev->features) - * @tx_path: whether it is called in TX path - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path) +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; __be16 type = skb->protocol; - int vlan_depth = ETH_HLEN; - int err; while (type == htons(ETH_P_8021Q)) { + int vlan_depth = ETH_HLEN; struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) @@ -2368,22 +2351,14 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, vlan_depth += VLAN_HLEN; } - skb_reset_mac_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb_needs_check(skb, tx_path))) { - skb_warn_bad_offload(skb); - - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) - return ERR_PTR(err); - } - rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + int err; + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -2401,6 +2376,49 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return segs; } +EXPORT_SYMBOL(skb_mac_gso_segment); + + +/* openvswitch calls this on rx path, so we need a different check. + */ +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL; + else + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + skb_warn_bad_offload(skb); + + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + return skb_mac_gso_segment(skb, features); +} EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ @@ -3784,7 +3802,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff __be16 type = skb->protocol; struct list_head *head = &offload_base; int same_flow; - int mac_len; enum gro_result ret; if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) @@ -3801,8 +3818,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff continue; skb_set_network_header(skb, skb_gro_offset(skb)); - mac_len = skb->network_header - skb->mac_header; - skb->mac_len = mac_len; + skb_reset_mac_len(skb); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d9d55209db67..3e9b2c3e30f0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c1ad09f8796..2a3ca33c30aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; + unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; int sg = !!(features & NETIF_F_SG); @@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); + + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) continue; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e6e5d8506336..e225a4e5b572 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | 0))) goto out; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a69b4e4a02b5..2e7f1948216f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a491055c76..7a4c710c4cdd 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -19,6 +19,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netdevice.h> +#include <linux/if_tunnel.h> #include <linux/spinlock.h> #include <net/protocol.h> #include <net/gre.h> @@ -26,6 +27,11 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + if (greh->protocol == htons(ETH_P_TEB)) { + struct ethhdr *eth = eth_hdr(skb); + skb->protocol = eth->h_proto; + } else { + skb->protocol = greh->protocol; + } + + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + } while ((skb = skb->next)); +out: + return segs; +} + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, .err_handler = gre_err, .netns_ok = 1, }; +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); @@ -127,11 +238,18 @@ static int __init gre_init(void) return -EAGAIN; } + if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { + pr_err("can't add protocol offload\n"); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); + return -EAGAIN; + } + return 0; } static void __exit gre_exit(void) { + inet_del_offload(&gre_offload, IPPROTO_GRE); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1211613c6c34..b6d30acb600c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -590,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 00a14b9864ea..a56f1182c176 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,8 +735,33 @@ drop: return 0; } +static struct sk_buff *handle_offloads(struct sk_buff *skb) +{ + int err; + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } + skb->ip_summed = CHECKSUM_NONE; + + return skb; + +error: + kfree_skb(skb); + return ERR_PTR(err); +} + static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *old_iph; const struct iphdr *tiph; @@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; u8 ttl; + int err; + int pkt_len; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) - goto tx_error; + skb = handle_offloads(skb); + if (IS_ERR(skb)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } old_iph = ip_hdr(skb); @@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); - if ((old_iph->frag_off&htons(IP_DF)) && + if (!skb_is_gso(skb) && + (old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { + if (!skb_is_gso(skb) && + mtu >= IPV6_MIN_MTU && + mtu < skb->len - tunnel->hlen + gre_hlen) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; + iph->id = 0; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev *ptr = tunnel->parms.o_key; ptr--; } - if (tunnel->parms.o_flags&GRE_CSUM) { + /* Skip GRE checksum if skb is getting offloaded. */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && + (tunnel->parms.o_flags&GRE_CSUM)) { int offset = skb_transport_offset(skb); + if (skb_has_shared_frag(skb)) { + err = __skb_linearize(skb); + if (err) { + ip_rt_put(rt); + goto tx_error; + } + } + *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, skb->len - offset, @@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - iptunnel_xmit(skb, dev); + nf_reset(skb); + + pkt_len = skb->len - skb_transport_offset(skb); + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) @@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) mtu = 68; tunnel->hlen = addend; + /* TCP offload with GRE SEQ is not supported. */ + if (!(tunnel->parms.o_flags & GRE_SEQ)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } return mtu; } @@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev) dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f0bedb8622f..7a5ba48c2cc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_GRE | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6182d90e97b0..fd0cea114b5d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; __pskb_trim_head(skb, len); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6791aac06ea9..39a5e7a9a77f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216dc..1f12c8b45864 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto drop; /* Now we can update and verify the packet length... */ diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ddee0a099a2c..1162ace30838 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -142,8 +142,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) for_each_input_rcu(rcv_notify_handlers, handler) handler->handler(skb); - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 384233188ac1..bb02e176cb70 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f095..8234c1dcdf72 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c674f158efa8..b89a8c3186cd 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -368,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + if (skb_unclone(head, GFP_ATOMIC)) { pr_debug("skb is cloned but can't expand head"); goto out_oom; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index bab2c270f292..e354743ed426 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -404,7 +404,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0c8934a317c2..cf05cf073c51 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9f2095b19ad0..93c41a81c4c3 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -69,8 +69,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0fb9e3f567e6..e0f6de64afec 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_ipt *ipt = a->priv; struct xt_action_param par; - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - } + if (skb_unclone(skb, GFP_ATOMIC)) + return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 0c3faddf3f2c..7ed78c9e505c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -131,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, int i, munged = 0; unsigned int off; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; off = skb_network_offset(skb); |