diff options
107 files changed, 3049 insertions, 638 deletions
diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt index 3a9d67951606..72d82d684342 100644 --- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt +++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt @@ -11,6 +11,8 @@ Required properties: designware version numbers documented in stmmac.txt - altr,sysmgr-syscon : Should be the phandle to the system manager node that encompasses the glue register, the register offset, and the register shift. + - altr,f2h_ptp_ref_clk use f2h_ptp_ref_clk instead of default eosc1 clock + for ptp ref clk. This affects all emacs as the clock is common. Optional properties: altr,emac-splitter: Should be the phandle to the emac splitter soft IP node if diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index f34fc3c81a75..e862a922bd3f 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -35,18 +35,18 @@ Optional properties: - reset-names: Should contain the reset signal name "stmmaceth", if a reset phandle is given - max-frame-size: See ethernet.txt file in the same directory -- clocks: If present, the first clock should be the GMAC main clock and - the second clock should be peripheral's register interface clock. Further - clocks may be specified in derived bindings. -- clock-names: One name for each entry in the clocks property, the - first one should be "stmmaceth" and the second one should be "pclk". -- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is - available this clock is used for programming the Timestamp Addend Register. - If not passed then the system clock will be used and this is fine on some - platforms. +- clocks: If present, the first clock should be the GMAC main clock + The optional second clock should be peripheral's register interface clock. + The third optional clock should be the ptp reference clock. + Further clocks may be specified in derived bindings. +- clock-names: One name for each entry in the clocks property. + The first one should be "stmmaceth". + The optional second one should be "pclk". + The optional third one should be "clk_ptp_ref". - snps,burst_len: The AXI burst lenth value of the AXI BUS MODE register. - tx-fifo-depth: See ethernet.txt file in the same directory - rx-fifo-depth: See ethernet.txt file in the same directory +- mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus. Examples: @@ -65,4 +65,11 @@ Examples: tx-fifo-depth = <16384>; clocks = <&clock>; clock-names = "stmmaceth"; + mdio0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + phy1: ethernet-phy@0 { + }; + }; }; diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2ea4c45cf1c8..5de632ed0ec0 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1723,6 +1723,25 @@ addip_enable - BOOLEAN Default: 0 +pf_enable - INTEGER + Enable or disable pf (pf is short for potentially failed) state. A value + of pf_retrans > path_max_retrans also disables pf state. That is, one of + both pf_enable and pf_retrans > path_max_retrans can disable pf state. + Since pf_retrans and path_max_retrans can be changed by userspace + application, sometimes user expects to disable pf state by the value of + pf_retrans > path_max_retrans, but occasionally the value of pf_retrans + or path_max_retrans is changed by the user application, this pf state is + enabled. As such, it is necessary to add this to dynamically enable + and disable pf state. See: + https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for + details. + + 1: Enable pf. + + 0: Disable pf. + + Default: 1 + addip_noauth_enable - BOOLEAN Dynamic Address Reconfiguration (ADD-IP) requires the use of authentication to protect the operations of adding or removing new @@ -1799,7 +1818,9 @@ pf_retrans - INTEGER having to reduce path_max_retrans to a very low value. See: http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt for details. Note also that a value of pf_retrans > path_max_retrans - disables this feature + disables this feature. Since both pf_retrans and path_max_retrans can + be changed by userspace application, a variable pf_enable is used to + disable pf state. Default: 0 diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe0e7a6f4d72..cab99fd44c8e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1067,12 +1067,12 @@ static netdev_features_t bond_fix_features(struct net_device *dev, return features; } -#define BOND_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) -#define BOND_ENC_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ - NETIF_F_ALL_TSO) +#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_ALL_TSO) static void bond_compute_features(struct bonding *bond) { @@ -4182,7 +4182,6 @@ void bond_setup(struct net_device *bond_dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; bond_dev->features |= bond_dev->hw_features; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4cab8879f5ae..34e324f20d80 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5289,7 +5289,7 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 7af870a3c549..6691b5a45b9d 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -763,7 +763,7 @@ static netdev_features_t ibmveth_fix_features(struct net_device *dev, */ if (!(features & NETIF_F_RXCSUM)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } @@ -928,7 +928,8 @@ static int ibmveth_set_features(struct net_device *dev, rc1 = ibmveth_set_csum_offload(dev, rx_csum); if (rc1 && !adapter->rx_csum) dev->features = - features & ~(NETIF_F_ALL_CSUM | NETIF_F_RXCSUM); + features & ~(NETIF_F_CSUM_MASK | + NETIF_F_RXCSUM); } if (large_send != adapter->large_send) { diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 4163b16489b3..fa593dd3efe1 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -280,6 +280,16 @@ config I40E_VXLAN Say Y here if you want to use Virtual eXtensible Local Area Network (VXLAN) in the driver. +config I40E_GENEVE + bool "Generic Network Virtualization Encapsulation (GENEVE) Support" + depends on I40E && GENEVE && !(I40E=y && GENEVE=m) + default n + ---help--- + This allows one to create GENEVE virtual interfaces that provide + Layer 2 Networks over Layer 3 Networks. GENEVE is often used + to tunnel virtual network infrastructure in virtualized environments. + Say Y here if you want to use GENEVE in the driver. + config I40E_DCB bool "Data Center Bridging (DCB) Support" default n diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 133d4074dbe4..f7c7804d79e5 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -441,12 +441,13 @@ #define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ #define E1000_IMS_TXQ0 E1000_ICR_TXQ0 /* Tx Queue 0 Interrupt */ #define E1000_IMS_TXQ1 E1000_ICR_TXQ1 /* Tx Queue 1 Interrupt */ -#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupts */ +#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupt */ /* Interrupt Cause Set */ #define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ +#define E1000_ICS_OTHER E1000_ICR_OTHER /* Other Interrupt */ /* Transmit Descriptor Control */ #define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 0b748d1959d9..1dc293bad87b 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -480,7 +480,7 @@ extern const char e1000e_driver_version[]; void e1000e_check_options(struct e1000_adapter *adapter); void e1000e_set_ethtool_ops(struct net_device *netdev); -int e1000e_up(struct e1000_adapter *adapter); +void e1000e_up(struct e1000_adapter *adapter); void e1000e_down(struct e1000_adapter *adapter, bool reset); void e1000e_reinit_locked(struct e1000_adapter *adapter); void e1000e_reset(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 775e38910681..c71ba1bfc1ec 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1905,30 +1905,15 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 icr = er32(ICR); - - if (!(icr & E1000_ICR_INT_ASSERTED)) { - if (!test_bit(__E1000_DOWN, &adapter->state)) - ew32(IMS, E1000_IMS_OTHER); - return IRQ_NONE; - } - if (icr & adapter->eiac_mask) - ew32(ICS, (icr & adapter->eiac_mask)); + hw->mac.get_link_status = true; - if (icr & E1000_ICR_OTHER) { - if (!(icr & E1000_ICR_LSC)) - goto no_link_interrupt; - hw->mac.get_link_status = true; - /* guard against interrupt when we're going down */ - if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + /* guard against interrupt when we're going down */ + if (!test_bit(__E1000_DOWN, &adapter->state)) { + mod_timer(&adapter->watchdog_timer, jiffies + 1); + ew32(IMS, E1000_IMS_OTHER); } -no_link_interrupt: - if (!test_bit(__E1000_DOWN, &adapter->state)) - ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER); - return IRQ_HANDLED; } @@ -1946,6 +1931,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused irq, void *data) /* Ring was not completely cleaned, so fire another interrupt */ ew32(ICS, tx_ring->ims_val); + if (!test_bit(__E1000_DOWN, &adapter->state)) + ew32(IMS, adapter->tx_ring->ims_val); + return IRQ_HANDLED; } @@ -2027,6 +2015,7 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) hw->hw_addr + E1000_EITR_82574(vector)); else writel(1, hw->hw_addr + E1000_EITR_82574(vector)); + adapter->eiac_mask |= E1000_IMS_OTHER; /* Cause Tx interrupts on every write back */ ivar |= (1 << 31); @@ -2034,12 +2023,8 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) ew32(IVAR, ivar); /* enable MSI-X PBA support */ - ctrl_ext = er32(CTRL_EXT); - ctrl_ext |= E1000_CTRL_EXT_PBA_CLR; - - /* Auto-Mask Other interrupts upon ICR read */ - ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER); - ctrl_ext |= E1000_CTRL_EXT_EIAME; + ctrl_ext = er32(CTRL_EXT) & ~E1000_CTRL_EXT_IAME; + ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME; ew32(CTRL_EXT, ctrl_ext); e1e_flush(); } @@ -2255,7 +2240,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC); } else if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); @@ -4146,10 +4131,24 @@ void e1000e_reset(struct e1000_adapter *adapter) } -int e1000e_up(struct e1000_adapter *adapter) +/** + * e1000e_trigger_lsc - trigger an LSC interrupt + * @adapter: + * + * Fire a link status change interrupt to start the watchdog. + **/ +static void e1000e_trigger_lsc(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + if (adapter->msix_entries) + ew32(ICS, E1000_ICS_OTHER); + else + ew32(ICS, E1000_ICS_LSC); +} + +void e1000e_up(struct e1000_adapter *adapter) +{ /* hardware has been reset, we need to reload some things */ e1000_configure(adapter); @@ -4161,13 +4160,7 @@ int e1000e_up(struct e1000_adapter *adapter) netif_start_queue(adapter->netdev); - /* fire a link change interrupt to start the watchdog */ - if (adapter->msix_entries) - ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER); - else - ew32(ICS, E1000_ICS_LSC); - - return 0; + e1000e_trigger_lsc(adapter); } static void e1000e_flush_descriptors(struct e1000_adapter *adapter) @@ -4592,11 +4585,7 @@ static int e1000_open(struct net_device *netdev) hw->mac.get_link_status = true; pm_runtime_put(&pdev->dev); - /* fire a link status change interrupt to start the watchdog */ - if (adapter->msix_entries) - ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER); - else - ew32(ICS, E1000_ICS_LSC); + e1000e_trigger_lsc(adapter); return 0; @@ -6633,7 +6622,7 @@ static int e1000e_pm_runtime_resume(struct device *dev) return rc; if (netdev->flags & IFF_UP) - rc = e1000e_up(adapter); + e1000e_up(adapter); return rc; } @@ -6824,13 +6813,8 @@ static void e1000_io_resume(struct pci_dev *pdev) e1000_init_manageability_pt(adapter); - if (netif_running(netdev)) { - if (e1000e_up(adapter)) { - dev_err(&pdev->dev, - "can't bring device back up after reset\n"); - return; - } - } + if (netif_running(netdev)) + e1000e_up(adapter); netif_device_attach(netdev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index d9854d39576d..83ddf362ea77 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1357,7 +1357,7 @@ static netdev_features_t fm10k_features_check(struct sk_buff *skb, if (!skb->encapsulation || fm10k_tx_encap_offload(skb)) return features; - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } static const struct net_device_ops fm10k_netdev_ops = { diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b7bc014ae00b..c202f9b9386a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -245,6 +245,11 @@ struct i40e_tc_configuration { struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS]; }; +struct i40e_udp_port_config { + __be16 index; + u8 type; +}; + /* struct that defines the Ethernet device */ struct i40e_pf { struct pci_dev *pdev; @@ -281,11 +286,9 @@ struct i40e_pf { u32 fd_atr_cnt; u32 fd_tcp_rule; -#ifdef CONFIG_I40E_VXLAN - __be16 vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; - u16 pending_vxlan_bitmap; + struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; + u16 pending_udp_bitmap; -#endif enum i40e_interrupt_policy int_policy; u16 rx_itr_default; u16 tx_itr_default; @@ -322,9 +325,7 @@ struct i40e_pf { #define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22) #define I40E_FLAG_PTP BIT_ULL(25) #define I40E_FLAG_MFP_ENABLED BIT_ULL(26) -#ifdef CONFIG_I40E_VXLAN -#define I40E_FLAG_VXLAN_FILTER_SYNC BIT_ULL(27) -#endif +#define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(27) #define I40E_FLAG_PORT_ID_VALID BIT_ULL(28) #define I40E_FLAG_DCB_CAPABLE BIT_ULL(29) #define I40E_FLAG_RSS_AQ_CAPABLE BIT_ULL(31) @@ -336,6 +337,7 @@ struct i40e_pf { #define I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT_ULL(38) #define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39) #define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) +#define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE BIT_ULL(41) #define I40E_FLAG_NO_PCI_LINK_CHECK BIT_ULL(42) /* tracks features that get auto disabled by errors */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b118deb08ce6..23211e08eecb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -27,9 +27,12 @@ /* Local includes */ #include "i40e.h" #include "i40e_diag.h" -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) #include <net/vxlan.h> #endif +#if IS_ENABLED(CONFIG_GENEVE) +#include <net/geneve.h> +#endif const char i40e_driver_name[] = "i40e"; static const char i40e_driver_string[] = @@ -5336,6 +5339,9 @@ int i40e_open(struct net_device *netdev) #ifdef CONFIG_I40E_VXLAN vxlan_get_rx_port(netdev); #endif +#ifdef CONFIG_I40E_GENEVE + geneve_get_rx_port(netdev); +#endif return 0; } @@ -7036,30 +7042,30 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) i40e_flush(hw); } -#ifdef CONFIG_I40E_VXLAN /** - * i40e_sync_vxlan_filters_subtask - Sync the VSI filter list with HW + * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW * @pf: board private structure **/ -static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf) +static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) { +#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE) struct i40e_hw *hw = &pf->hw; i40e_status ret; __be16 port; int i; - if (!(pf->flags & I40E_FLAG_VXLAN_FILTER_SYNC)) + if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC)) return; - pf->flags &= ~I40E_FLAG_VXLAN_FILTER_SYNC; + pf->flags &= ~I40E_FLAG_UDP_FILTER_SYNC; for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->pending_vxlan_bitmap & BIT_ULL(i)) { - pf->pending_vxlan_bitmap &= ~BIT_ULL(i); - port = pf->vxlan_ports[i]; + if (pf->pending_udp_bitmap & BIT_ULL(i)) { + pf->pending_udp_bitmap &= ~BIT_ULL(i); + port = pf->udp_ports[i].index; if (port) ret = i40e_aq_add_udp_tunnel(hw, ntohs(port), - I40E_AQC_TUNNEL_TYPE_VXLAN, + pf->udp_ports[i].type, NULL, NULL); else ret = i40e_aq_del_udp_tunnel(hw, i, NULL); @@ -7072,13 +7078,13 @@ static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf) i40e_stat_str(&pf->hw, ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - pf->vxlan_ports[i] = 0; + pf->udp_ports[i].index = 0; } } } +#endif } -#endif /** * i40e_service_task - Run the driver's async subtasks * @work: pointer to work_struct containing our data @@ -7103,8 +7109,8 @@ static void i40e_service_task(struct work_struct *work) i40e_watchdog_subtask(pf); i40e_fdir_reinit_subtask(pf); i40e_sync_filters_subtask(pf); -#ifdef CONFIG_I40E_VXLAN - i40e_sync_vxlan_filters_subtask(pf); +#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE) + i40e_sync_udp_filters_subtask(pf); #endif i40e_clean_adminq_subtask(pf); @@ -8380,7 +8386,8 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_HW_ATR_EVICT_CAPABLE | I40E_FLAG_OUTER_UDP_CSUM_CAPABLE | I40E_FLAG_WB_ON_ITR_CAPABLE | - I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE; + I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | + I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; } pf->eeprom_version = 0xDEAD; pf->lan_veb = I40E_NO_VEB; @@ -8479,26 +8486,27 @@ static int i40e_set_features(struct net_device *netdev, return 0; } -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE) /** - * i40e_get_vxlan_port_idx - Lookup a possibly offloaded for Rx UDP port + * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port * @pf: board private structure * @port: The UDP port to look up * * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found **/ -static u8 i40e_get_vxlan_port_idx(struct i40e_pf *pf, __be16 port) +static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port) { u8 i; for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->vxlan_ports[i] == port) + if (pf->udp_ports[i].index == port) return i; } return i; } +#endif /** * i40e_add_vxlan_port - Get notifications about VXLAN ports that come up * @netdev: This physical port's netdev @@ -8508,6 +8516,7 @@ static u8 i40e_get_vxlan_port_idx(struct i40e_pf *pf, __be16 port) static void i40e_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family, __be16 port) { +#if IS_ENABLED(CONFIG_VXLAN) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; @@ -8517,7 +8526,7 @@ static void i40e_add_vxlan_port(struct net_device *netdev, if (sa_family == AF_INET6) return; - idx = i40e_get_vxlan_port_idx(pf, port); + idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { @@ -8527,7 +8536,7 @@ static void i40e_add_vxlan_port(struct net_device *netdev, } /* Now check if there is space to add the new port */ - next_idx = i40e_get_vxlan_port_idx(pf, 0); + next_idx = i40e_get_udp_port_idx(pf, 0); if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) { netdev_info(netdev, "maximum number of vxlan UDP ports reached, not adding port %d\n", @@ -8536,9 +8545,11 @@ static void i40e_add_vxlan_port(struct net_device *netdev, } /* New port: add it and mark its index in the bitmap */ - pf->vxlan_ports[next_idx] = port; - pf->pending_vxlan_bitmap |= BIT_ULL(next_idx); - pf->flags |= I40E_FLAG_VXLAN_FILTER_SYNC; + pf->udp_ports[next_idx].index = port; + pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN; + pf->pending_udp_bitmap |= BIT_ULL(next_idx); + pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; +#endif } /** @@ -8550,6 +8561,7 @@ static void i40e_add_vxlan_port(struct net_device *netdev, static void i40e_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family, __be16 port) { +#if IS_ENABLED(CONFIG_VXLAN) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; @@ -8558,23 +8570,108 @@ static void i40e_del_vxlan_port(struct net_device *netdev, if (sa_family == AF_INET6) return; - idx = i40e_get_vxlan_port_idx(pf, port); + idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { /* if port exists, set it to 0 (mark for deletion) * and make it pending */ - pf->vxlan_ports[idx] = 0; - pf->pending_vxlan_bitmap |= BIT_ULL(idx); - pf->flags |= I40E_FLAG_VXLAN_FILTER_SYNC; + pf->udp_ports[idx].index = 0; + pf->pending_udp_bitmap |= BIT_ULL(idx); + pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; } else { netdev_warn(netdev, "vxlan port %d was not found, not deleting\n", ntohs(port)); } +#endif } +/** + * i40e_add_geneve_port - Get notifications about GENEVE ports that come up + * @netdev: This physical port's netdev + * @sa_family: Socket Family that GENEVE is notifying us about + * @port: New UDP port number that GENEVE started listening to + **/ +static void i40e_add_geneve_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ +#if IS_ENABLED(CONFIG_GENEVE) + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + u8 next_idx; + u8 idx; + + if (sa_family == AF_INET6) + return; + + idx = i40e_get_udp_port_idx(pf, port); + + /* Check if port already exists */ + if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { + netdev_info(netdev, "udp port %d already offloaded\n", + ntohs(port)); + return; + } + + /* Now check if there is space to add the new port */ + next_idx = i40e_get_udp_port_idx(pf, 0); + + if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) { + netdev_info(netdev, "maximum number of UDP ports reached, not adding port %d\n", + ntohs(port)); + return; + } + + /* New port: add it and mark its index in the bitmap */ + pf->udp_ports[next_idx].index = port; + pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE; + pf->pending_udp_bitmap |= BIT_ULL(next_idx); + pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; + + dev_info(&pf->pdev->dev, "adding geneve port %d\n", ntohs(port)); #endif +} + +/** + * i40e_del_geneve_port - Get notifications about GENEVE ports that go away + * @netdev: This physical port's netdev + * @sa_family: Socket Family that GENEVE is notifying us about + * @port: UDP port number that GENEVE stopped listening to + **/ +static void i40e_del_geneve_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ +#if IS_ENABLED(CONFIG_GENEVE) + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + u8 idx; + + if (sa_family == AF_INET6) + return; + + idx = i40e_get_udp_port_idx(pf, port); + + /* Check if port already exists */ + if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { + /* if port exists, set it to 0 (mark for deletion) + * and make it pending + */ + pf->udp_ports[idx].index = 0; + pf->pending_udp_bitmap |= BIT_ULL(idx); + pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; + + dev_info(&pf->pdev->dev, "deleting geneve port %d\n", + ntohs(port)); + } else { + netdev_warn(netdev, "geneve port %d was not found, not deleting\n", + ntohs(port)); + } +#endif +} + static int i40e_get_phys_port_id(struct net_device *netdev, struct netdev_phys_item_id *ppid) { @@ -8752,7 +8849,10 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nlflags, 0, 0, filter_mask, NULL); } -#define I40E_MAX_TUNNEL_HDR_LEN 80 +/* Hardware supports L4 tunnel length of 128B (=2^7) which includes + * inner mac plus all inner ethertypes. + */ +#define I40E_MAX_TUNNEL_HDR_LEN 128 /** * i40e_features_check - Validate encapsulated packet conforms to limits * @skb: skb buff @@ -8764,9 +8864,9 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, netdev_features_t features) { if (skb->encapsulation && - (skb_inner_mac_header(skb) - skb_transport_header(skb) > + ((skb_inner_network_header(skb) - skb_transport_header(skb)) > I40E_MAX_TUNNEL_HDR_LEN)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } @@ -8799,10 +8899,14 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) .ndo_add_vxlan_port = i40e_add_vxlan_port, .ndo_del_vxlan_port = i40e_del_vxlan_port, #endif +#if IS_ENABLED(CONFIG_GENEVE) + .ndo_add_geneve_port = i40e_add_geneve_port, + .ndo_del_geneve_port = i40e_del_geneve_port, +#endif .ndo_get_phys_port_id = i40e_get_phys_port_id, .ndo_fdb_add = i40e_ndo_fdb_add, .ndo_features_check = i40e_features_check, @@ -8836,13 +8940,14 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np->vsi = vsi; netdev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_RXCSUM | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | NETIF_F_TSO; netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_SCTP_CSUM | + NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | @@ -10349,6 +10454,9 @@ static void i40e_print_features(struct i40e_pf *pf) #if IS_ENABLED(CONFIG_VXLAN) i += snprintf(&buf[i], REMAIN(i), " VxLAN"); #endif +#if IS_ENABLED(CONFIG_GENEVE) + i += snprintf(&buf[i], REMAIN(i), " Geneve"); +#endif if (pf->flags & I40E_FLAG_PTP) i += snprintf(&buf[i], REMAIN(i), " PTP"); #ifdef I40E_FCOE diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b0ae3e695783..e9e9a37ee274 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1380,7 +1380,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN traffic has an outer UDPv4 checksum we need to check + /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check * it in the driver, hardware does not do it for us. * Since L3L4P bit was set we assume a valid IHL value (>=5) * so the total length of IPv4 header is IHL*4 bytes @@ -2001,7 +2001,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) return; - if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) { + if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) { /* snag network header to get L4 type and address */ hdr.network = skb_network_header(skb); @@ -2086,7 +2086,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; - if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) + if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) dtype_cmd |= ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & @@ -2319,7 +2319,7 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, oudph = udp_hdr(skb); oiph = ip_hdr(skb); l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; - *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; case IPPROTO_GRE: l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index dccc1eb576f2..3f081e25e097 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -163,7 +163,7 @@ enum i40e_dyn_idx_t { #define I40E_TX_FLAGS_FSO BIT(7) #define I40E_TX_FLAGS_TSYN BIT(8) #define I40E_TX_FLAGS_FD_SB BIT(9) -#define I40E_TX_FLAGS_VXLAN_TUNNEL BIT(10) +#define I40E_TX_FLAGS_UDP_TUNNEL BIT(10) #define I40E_TX_FLAGS_VLAN_MASK 0xffff0000 #define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 455394cf7f80..4d05ff6f0423 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2321,7 +2321,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter) netdev->features |= NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_SCTP_CSUM | + NETIF_F_SCTP_CRC | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 362911d024b5..adb33e2a0137 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -45,8 +45,6 @@ static s32 igb_get_cfg_done_82575(struct e1000_hw *); static s32 igb_init_hw_82575(struct e1000_hw *); static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *); static s32 igb_read_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16 *); -static s32 igb_read_phy_reg_82580(struct e1000_hw *, u32, u16 *); -static s32 igb_write_phy_reg_82580(struct e1000_hw *, u32, u16); static s32 igb_reset_hw_82575(struct e1000_hw *); static s32 igb_reset_hw_82580(struct e1000_hw *); static s32 igb_set_d0_lplu_state_82575(struct e1000_hw *, bool); @@ -205,13 +203,10 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) case e1000_82580: case e1000_i350: case e1000_i354: - phy->ops.read_reg = igb_read_phy_reg_82580; - phy->ops.write_reg = igb_write_phy_reg_82580; - break; case e1000_i210: case e1000_i211: - phy->ops.read_reg = igb_read_phy_reg_gs40g; - phy->ops.write_reg = igb_write_phy_reg_gs40g; + phy->ops.read_reg = igb_read_phy_reg_82580; + phy->ops.write_reg = igb_write_phy_reg_82580; break; default: phy->ops.read_reg = igb_read_phy_reg_igp; @@ -2153,7 +2148,7 @@ void igb_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) * Reads the MDI control register in the PHY at offset and stores the * information read to data. **/ -static s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) +s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; @@ -2177,7 +2172,7 @@ out: * * Writes data to MDI control register in the PHY at offset. **/ -static s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) +s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index a61ee9462dd4..c3c598c347a9 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -927,7 +927,10 @@ /* Intel i347-AT4 Registers */ -#define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */ +#define I347AT4_PCDL0 0x10 /* Pair 0 PHY Cable Diagnostics Length */ +#define I347AT4_PCDL1 0x11 /* Pair 1 PHY Cable Diagnostics Length */ +#define I347AT4_PCDL2 0x12 /* Pair 2 PHY Cable Diagnostics Length */ +#define I347AT4_PCDL3 0x13 /* Pair 3 PHY Cable Diagnostics Length */ #define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */ #define I347AT4_PAGE_SELECT 0x16 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 2003b3756ba2..4034207eb5cc 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -441,6 +441,7 @@ struct e1000_phy_info { u16 cable_length; u16 max_cable_length; u16 min_cable_length; + u16 pair_length[4]; u8 mdix; diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 29f59c76878a..8aa798737d4d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -861,10 +861,10 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw) if (ret_val) nvm_word = E1000_INVM_DEFAULT_AL; tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; + igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE); for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { /* check current state directly from internal PHY */ - igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | - E1000_PHY_PLL_FREQ_REG), &phy_word); + igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, &phy_word); if ((phy_word & E1000_PHY_PLL_UNCONF) != E1000_PHY_PLL_UNCONF) { ret_val = 0; @@ -896,6 +896,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw) /* restore WUC register */ wr32(E1000_WUC, wuc); } + igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 0); /* restore MDICNFG setting */ wr32(E1000_MDICNFG, mdicnfg); return ret_val; diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index eaa68a50cb3b..b2964a2a60b1 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -85,7 +85,7 @@ enum E1000_INVM_STRUCTURE_TYPE { #define E1000_PCI_PMCSR_D3 0x03 #define E1000_MAX_PLL_TRIES 5 #define E1000_PHY_PLL_UNCONF 0xFF -#define E1000_PHY_PLL_FREQ_PAGE 0xFC0000 +#define E1000_PHY_PLL_FREQ_PAGE 0xFC #define E1000_PHY_PLL_FREQ_REG 0x000E #define E1000_INVM_DEFAULT_AL 0x202F #define E1000_INVM_AUTOLOAD 0x0A diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index c0df40f2b295..5b54254aed4f 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1717,59 +1717,76 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, phy_data2, index, default_page, is_cm; + int len_tot = 0; + u16 len_min; + u16 len_max; switch (hw->phy.id) { + case M88E1543_E_PHY_ID: + case M88E1512_E_PHY_ID: + case I347AT4_E_PHY_ID: case I210_I_PHY_ID: - /* Get cable length from PHY Cable Diagnostics Control Reg */ - ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + - (I347AT4_PCDL + phy->addr), - &phy_data); + /* Remember the original page select and set it to 7 */ + ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, + &default_page); if (ret_val) - return ret_val; + goto out; + + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07); + if (ret_val) + goto out; /* Check if the unit of cable length is meters or cm */ - ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + - I347AT4_PCDC, &phy_data2); + ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2); if (ret_val) - return ret_val; + goto out; is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); - /* Populate the phy structure with cable length in meters */ - phy->min_cable_length = phy_data / (is_cm ? 100 : 1); - phy->max_cable_length = phy_data / (is_cm ? 100 : 1); - phy->cable_length = phy_data / (is_cm ? 100 : 1); - break; - case M88E1543_E_PHY_ID: - case M88E1512_E_PHY_ID: - case I347AT4_E_PHY_ID: - /* Remember the original page select and set it to 7 */ - ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, - &default_page); + /* Get cable length from Pair 0 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL0, &phy_data); if (ret_val) goto out; - ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07); + phy->pair_length[0] = phy_data / (is_cm ? 100 : 1); + len_tot = phy->pair_length[0]; + len_min = phy->pair_length[0]; + len_max = phy->pair_length[0]; + + /* Get cable length from Pair 1 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL1, &phy_data); if (ret_val) goto out; - /* Get cable length from PHY Cable Diagnostics Control Reg */ - ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr), - &phy_data); + phy->pair_length[1] = phy_data / (is_cm ? 100 : 1); + len_tot += phy->pair_length[1]; + len_min = min(len_min, phy->pair_length[1]); + len_max = max(len_max, phy->pair_length[1]); + + /* Get cable length from Pair 2 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL2, &phy_data); if (ret_val) goto out; - /* Check if the unit of cable length is meters or cm */ - ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2); + phy->pair_length[2] = phy_data / (is_cm ? 100 : 1); + len_tot += phy->pair_length[2]; + len_min = min(len_min, phy->pair_length[2]); + len_max = max(len_max, phy->pair_length[2]); + + /* Get cable length from Pair 3 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL3, &phy_data); if (ret_val) goto out; - is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); + phy->pair_length[3] = phy_data / (is_cm ? 100 : 1); + len_tot += phy->pair_length[3]; + len_min = min(len_min, phy->pair_length[3]); + len_max = max(len_max, phy->pair_length[3]); /* Populate the phy structure with cable length in meters */ - phy->min_cable_length = phy_data / (is_cm ? 100 : 1); - phy->max_cable_length = phy_data / (is_cm ? 100 : 1); - phy->cable_length = phy_data / (is_cm ? 100 : 1); + phy->min_cable_length = len_min; + phy->max_cable_length = len_max; + phy->cable_length = len_tot / 4; /* Reset the page selec to its original value */ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, @@ -2588,66 +2605,6 @@ out: } /** - * igb_write_phy_reg_gs40g - Write GS40G PHY register - * @hw: pointer to the HW structure - * @offset: lower half is register offset to write to - * upper half is page to use. - * @data: data to write at register offset - * - * Acquires semaphore, if necessary, then writes the data to PHY register - * at the offset. Release any acquired semaphores before exiting. - **/ -s32 igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data) -{ - s32 ret_val; - u16 page = offset >> GS40G_PAGE_SHIFT; - - offset = offset & GS40G_OFFSET_MASK; - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); - if (ret_val) - goto release; - ret_val = igb_write_phy_reg_mdic(hw, offset, data); - -release: - hw->phy.ops.release(hw); - return ret_val; -} - -/** - * igb_read_phy_reg_gs40g - Read GS40G PHY register - * @hw: pointer to the HW structure - * @offset: lower half is register offset to read to - * upper half is page to use. - * @data: data to read at register offset - * - * Acquires semaphore, if necessary, then reads the data in the PHY register - * at the offset. Release any acquired semaphores before exiting. - **/ -s32 igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data) -{ - s32 ret_val; - u16 page = offset >> GS40G_PAGE_SHIFT; - - offset = offset & GS40G_OFFSET_MASK; - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); - if (ret_val) - goto release; - ret_val = igb_read_phy_reg_mdic(hw, offset, data); - -release: - hw->phy.ops.release(hw); - return ret_val; -} - -/** * igb_set_master_slave_mode - Setup PHY for Master/slave mode * @hw: pointer to the HW structure * diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index aa1ae61a61d8..969a6ddafa3b 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -72,8 +72,8 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw); s32 igb_get_phy_info_82580(struct e1000_hw *hw); s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw); s32 igb_get_cable_length_82580(struct e1000_hw *hw); -s32 igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data); -s32 igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data); +s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data); +s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data); s32 igb_check_polarity_m88(struct e1000_hw *hw); /* IGP01E1000 Specific Registers */ @@ -144,17 +144,6 @@ s32 igb_check_polarity_m88(struct e1000_hw *hw); #define E1000_CABLE_LENGTH_UNDEFINED 0xFF -/* GS40G - I210 PHY defines */ -#define GS40G_PAGE_SELECT 0x16 -#define GS40G_PAGE_SHIFT 16 -#define GS40G_OFFSET_MASK 0xFFFF -#define GS40G_PAGE_2 0x20000 -#define GS40G_MAC_REG2 0x15 -#define GS40G_MAC_LB 0x4140 -#define GS40G_MAC_SPEED_1G 0X0006 -#define GS40G_COPPER_SPEC 0x0010 -#define GS40G_LINE_LB 0x4000 - /* SFP modules ID memory locations */ #define E1000_SFF_IDENTIFIER_OFFSET 0x00 #define E1000_SFF_IDENTIFIER_SFF 0x02 diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 2529bc625de4..1d329f1d047b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -127,10 +127,20 @@ static const struct igb_stats igb_gstrings_net_stats[] = { #define IGB_STATS_LEN \ (IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN) +enum igb_diagnostics_results { + TEST_REG = 0, + TEST_EEP, + TEST_IRQ, + TEST_LOOP, + TEST_LINK +}; + static const char igb_gstrings_test[][ETH_GSTRING_LEN] = { - "Register test (offline)", "Eeprom test (offline)", - "Interrupt test (offline)", "Loopback test (offline)", - "Link test (on/offline)" + [TEST_REG] = "Register test (offline)", + [TEST_EEP] = "Eeprom test (offline)", + [TEST_IRQ] = "Interrupt test (offline)", + [TEST_LOOP] = "Loopback test (offline)", + [TEST_LINK] = "Link test (on/offline)" }; #define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN) @@ -2002,7 +2012,7 @@ static void igb_diag_test(struct net_device *netdev, /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ - if (igb_link_test(adapter, &data[4])) + if (igb_link_test(adapter, &data[TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; if (if_running) @@ -2011,21 +2021,21 @@ static void igb_diag_test(struct net_device *netdev, else igb_reset(adapter); - if (igb_reg_test(adapter, &data[0])) + if (igb_reg_test(adapter, &data[TEST_REG])) eth_test->flags |= ETH_TEST_FL_FAILED; igb_reset(adapter); - if (igb_eeprom_test(adapter, &data[1])) + if (igb_eeprom_test(adapter, &data[TEST_EEP])) eth_test->flags |= ETH_TEST_FL_FAILED; igb_reset(adapter); - if (igb_intr_test(adapter, &data[2])) + if (igb_intr_test(adapter, &data[TEST_IRQ])) eth_test->flags |= ETH_TEST_FL_FAILED; igb_reset(adapter); /* power up link for loopback test */ igb_power_up_link(adapter); - if (igb_loopback_test(adapter, &data[3])) + if (igb_loopback_test(adapter, &data[TEST_LOOP])) eth_test->flags |= ETH_TEST_FL_FAILED; /* restore speed, duplex, autoneg settings */ @@ -2045,16 +2055,16 @@ static void igb_diag_test(struct net_device *netdev, dev_info(&adapter->pdev->dev, "online testing starting\n"); /* PHY is powered down when interface is down */ - if (if_running && igb_link_test(adapter, &data[4])) + if (if_running && igb_link_test(adapter, &data[TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; else - data[4] = 0; + data[TEST_LINK] = 0; /* Online tests aren't run; pass by default */ - data[0] = 0; - data[1] = 0; - data[2] = 0; - data[3] = 0; + data[TEST_REG] = 0; + data[TEST_EEP] = 0; + data[TEST_IRQ] = 0; + data[TEST_LOOP] = 0; clear_bit(__IGB_TESTING, &adapter->state); } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7afde455326d..31e5f3942839 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2379,8 +2379,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (hw->mac.type >= e1000_82576) { - netdev->hw_features |= NETIF_F_SCTP_CSUM; - netdev->features |= NETIF_F_SCTP_CSUM; + netdev->hw_features |= NETIF_F_SCTP_CRC; + netdev->features |= NETIF_F_SCTP_CRC; } netdev->priv_flags |= IFF_UNICAST_FLT; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 66c64a376719..fca35aa90d0f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8598,7 +8598,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) > IXGBE_MAX_TUNNEL_HDR_LEN)) - return features & ~NETIF_F_ALL_CSUM; + return features & ~NETIF_F_CSUM_MASK; return features; } @@ -8995,8 +8995,8 @@ skip_sriov: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: - netdev->features |= NETIF_F_SCTP_CSUM; - netdev->hw_features |= NETIF_F_SCTP_CSUM | + netdev->features |= NETIF_F_SCTP_CRC; + netdev->hw_features |= NETIF_F_SCTP_CRC | NETIF_F_NTUPLE; break; default: diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 060dd3922974..b1de7afd4116 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2753,7 +2753,7 @@ static netdev_features_t jme_fix_features(struct net_device *netdev, netdev_features_t features) { if (netdev->mtu > 1900) - features &= ~(NETIF_F_ALL_TSO | NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_ALL_TSO | NETIF_F_CSUM_MASK); return features; } diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5606a043063e..ec0a22119e09 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4380,7 +4380,7 @@ static netdev_features_t sky2_fix_features(struct net_device *dev, */ if (dev->mtu > ETH_DATA_LEN && hw->chip_id == CHIP_ID_YUKON_EC_U) { netdev_info(dev, "checksum offload not possible with jumbo frames\n"); - features &= ~(NETIF_F_TSO|NETIF_F_SG|NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_TSO | NETIF_F_SG | NETIF_F_CSUM_MASK); } /* Some hardware requires receive checksum for RSS to work. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index ec8caf8fedc6..ce26adcb4988 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -41,7 +41,7 @@ config MLXSW_SWITCHX2 config MLXSW_SPECTRUM tristate "Mellanox Technologies Spectrum support" - depends on MLXSW_CORE && NET_SWITCHDEV + depends on MLXSW_CORE && NET_SWITCHDEV && VLAN_8021Q default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index af631df4603a..66d851d4dfb4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -396,7 +396,7 @@ static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index, static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, enum mlxsw_reg_sfd_rec_policy policy, - const char *mac, u16 vid, + const char *mac, u16 fid_vid, enum mlxsw_reg_sfd_rec_action action, u8 local_port) { @@ -404,16 +404,16 @@ static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, MLXSW_REG_SFD_REC_TYPE_UNICAST, policy, mac, action); mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); - mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, fid_vid); mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); } static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index, - char *mac, u16 *p_vid, + char *mac, u16 *p_fid_vid, u8 *p_local_port) { mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); - *p_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index); + *p_fid_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index); *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index); } @@ -438,6 +438,13 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x08, false); +/* reg_sfd_uc_lag_lag_vid + * Indicates VID in case of vFIDs. Reserved for FIDs. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + /* reg_sfd_uc_lag_lag_id * LAG Identifier - pointer into the LAG descriptor table. * Access: RW @@ -448,15 +455,16 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10, static inline void mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index, enum mlxsw_reg_sfd_rec_policy policy, - const char *mac, u16 vid, - enum mlxsw_reg_sfd_rec_action action, + const char *mac, u16 fid_vid, + enum mlxsw_reg_sfd_rec_action action, u16 lag_vid, u16 lag_id) { mlxsw_reg_sfd_rec_pack(payload, rec_index, MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG, policy, mac, action); mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0); - mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, fid_vid); + mlxsw_reg_sfd_uc_lag_lag_vid_set(payload, rec_index, lag_vid); mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 322ed544348f..c588c65e91f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -48,6 +48,7 @@ #include <linux/workqueue.h> #include <linux/jiffies.h> #include <linux/bitops.h> +#include <linux/list.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -186,33 +187,6 @@ static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } -static int mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - int err; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, - MLXSW_SP_VFID_BASE + vfid, 0); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); - - if (err) - return err; - - set_bit(vfid, mlxsw_sp->active_vfids); - return 0; -} - -static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - clear_bit(vfid, mlxsw_sp->active_vfids); - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, - MLXSW_SP_VFID_BASE + vfid, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port, unsigned char *addr) { @@ -549,12 +523,132 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } +static struct mlxsw_sp_vfid * +mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid) +{ + struct mlxsw_sp_vfid *vfid; + + list_for_each_entry(vfid, &mlxsw_sp->port_vfids.list, list) { + if (vfid->vid == vid) + return vfid; + } + + return NULL; +} + +static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) +{ + return find_first_zero_bit(mlxsw_sp->port_vfids.mapped, + MLXSW_SP_VFID_PORT_MAX); +} + +static int __mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid) +{ + u16 fid = mlxsw_sp_vfid_to_fid(vfid); + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static void __mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid) +{ + u16 fid = mlxsw_sp_vfid_to_fid(vfid); + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, fid, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static struct mlxsw_sp_vfid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, + u16 vid) +{ + struct device *dev = mlxsw_sp->bus_info->dev; + struct mlxsw_sp_vfid *vfid; + u16 n_vfid; + int err; + + n_vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); + if (n_vfid == MLXSW_SP_VFID_PORT_MAX) { + dev_err(dev, "No available vFIDs\n"); + return ERR_PTR(-ERANGE); + } + + err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); + if (err) { + dev_err(dev, "Failed to create vFID=%d\n", n_vfid); + return ERR_PTR(err); + } + + vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); + if (!vfid) + goto err_allocate_vfid; + + vfid->vfid = n_vfid; + vfid->vid = vid; + + list_add(&vfid->list, &mlxsw_sp->port_vfids.list); + set_bit(n_vfid, mlxsw_sp->port_vfids.mapped); + + return vfid; + +err_allocate_vfid: + __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); + return ERR_PTR(-ENOMEM); +} + +static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vfid *vfid) +{ + clear_bit(vfid->vfid, mlxsw_sp->port_vfids.mapped); + list_del(&vfid->list); + + __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); + + kfree(vfid); +} + +static struct mlxsw_sp_port * +mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_vfid *vfid) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = kzalloc(sizeof(*mlxsw_sp_vport), GFP_KERNEL); + if (!mlxsw_sp_vport) + return NULL; + + /* dev will be set correctly after the VLAN device is linked + * with the real device. In case of bridge SELF invocation, dev + * will remain as is. + */ + mlxsw_sp_vport->dev = mlxsw_sp_port->dev; + mlxsw_sp_vport->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + mlxsw_sp_vport->local_port = mlxsw_sp_port->local_port; + mlxsw_sp_vport->stp_state = BR_STATE_FORWARDING; + mlxsw_sp_vport->lagged = mlxsw_sp_port->lagged; + mlxsw_sp_vport->lag_id = mlxsw_sp_port->lag_id; + mlxsw_sp_vport->vport.vfid = vfid; + mlxsw_sp_vport->vport.vid = vfid->vid; + + list_add(&mlxsw_sp_vport->vport.list, &mlxsw_sp_port->vports_list); + + return mlxsw_sp_vport; +} + +static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + list_del(&mlxsw_sp_vport->vport.list); + kfree(mlxsw_sp_vport); +} + int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char *sftr_pl; + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_vfid *vfid; int err; /* VLAN 0 is added to HW filter when device goes up, but it is @@ -563,100 +657,105 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, if (!vid) return 0; - if (test_bit(vid, mlxsw_sp_port->active_vfids)) { + if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) { netdev_warn(dev, "VID=%d already configured\n", vid); return 0; } - if (!test_bit(vid, mlxsw_sp->active_vfids)) { - err = mlxsw_sp_vfid_create(mlxsw_sp, vid); - if (err) { - netdev_err(dev, "Failed to create vFID=%d\n", - MLXSW_SP_VFID_BASE + vid); - return err; + vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); + if (!vfid) { + vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); + if (IS_ERR(vfid)) { + netdev_err(dev, "Failed to create vFID for VID=%d\n", + vid); + return PTR_ERR(vfid); } + } - sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); - if (!sftr_pl) { - err = -ENOMEM; - goto err_flood_table_alloc; - } - mlxsw_reg_sftr_pack(sftr_pl, 0, vid, - MLXSW_REG_SFGC_TABLE_TYPE_FID, 0, - MLXSW_PORT_CPU_PORT, true); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); - kfree(sftr_pl); + mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vfid); + if (!mlxsw_sp_vport) { + netdev_err(dev, "Failed to create vPort for VID=%d\n", vid); + err = -ENOMEM; + goto err_port_vport_create; + } + + if (!vfid->nr_vports) { + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, + true, false); if (err) { - netdev_err(dev, "Failed to configure flood table\n"); - goto err_flood_table_config; + netdev_err(dev, "Failed to setup flooding for vFID=%d\n", + vfid->vfid); + goto err_vport_flood_set; } } - /* In case we fail in the following steps, we intentionally do not - * destroy the associated vFID. - */ - /* When adding the first VLAN interface on a bridged port we need to * transition all the active 802.1Q bridge VLANs to use explicit * {Port, VID} to FID mappings and set the port's mode to Virtual mode. */ - if (!mlxsw_sp_port->nr_vfids) { + if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); if (err) { netdev_err(dev, "Failed to set to Virtual mode\n"); - return err; + goto err_port_vp_mode_trans; } } - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, MLXSW_SP_VFID_BASE + vid, vid); + true, + mlxsw_sp_vfid_to_fid(vfid->vfid), + vid); if (err) { netdev_err(dev, "Failed to map {Port, VID=%d} to vFID=%d\n", - vid, MLXSW_SP_VFID_BASE + vid); + vid, vfid->vfid); goto err_port_vid_to_fid_set; } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); if (err) { netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); goto err_port_vid_learning_set; } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, false); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", vid); goto err_port_add_vid; } - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid, + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, MLXSW_REG_SPMS_STATE_FORWARDING); if (err) { netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); goto err_port_stp_state_set; } - mlxsw_sp_port->nr_vfids++; - set_bit(vid, mlxsw_sp_port->active_vfids); + vfid->nr_vports++; return 0; -err_flood_table_config: -err_flood_table_alloc: - mlxsw_sp_vfid_destroy(mlxsw_sp, vid); - return err; - err_port_stp_state_set: - mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); err_port_add_vid: - mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); err_port_vid_learning_set: - mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, - MLXSW_SP_VFID_BASE + vid, vid); + mlxsw_sp_vfid_to_fid(vfid->vfid), vid); err_port_vid_to_fid_set: - mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + if (list_is_singular(&mlxsw_sp_port->vports_list)) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); +err_port_vp_mode_trans: + if (!vfid->nr_vports) + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, + false); +err_vport_flood_set: + mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); +err_port_vport_create: + if (!vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp, vfid); return err; } @@ -664,6 +763,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_vfid *vfid; int err; /* VLAN 0 is removed from HW filter when device goes down, but @@ -672,38 +773,42 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, if (!vid) return 0; - if (!test_bit(vid, mlxsw_sp_port->active_vfids)) { + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) { netdev_warn(dev, "VID=%d does not exist\n", vid); return 0; } - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid, + vfid = mlxsw_sp_vport->vport.vfid; + + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, MLXSW_REG_SPMS_STATE_DISCARDING); if (err) { netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); return err; } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", vid); return err; } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); if (err) { netdev_err(dev, "Failed to enable learning for VID=%d\n", vid); return err; } - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, MLXSW_SP_VFID_BASE + vid, + false, + mlxsw_sp_vfid_to_fid(vfid->vfid), vid); if (err) { netdev_err(dev, "Failed to invalidate {Port, VID=%d} to vFID=%d mapping\n", - vid, MLXSW_SP_VFID_BASE + vid); + vid, vfid->vfid); return err; } @@ -711,7 +816,7 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, * transition all active 802.1Q bridge VLANs to use VID to FID * mappings and set port's mode to VLAN mode. */ - if (mlxsw_sp_port->nr_vfids == 1) { + if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); if (err) { netdev_err(dev, "Failed to set to VLAN mode\n"); @@ -719,8 +824,12 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, } } - mlxsw_sp_port->nr_vfids--; - clear_bit(vid, mlxsw_sp_port->active_vfids); + vfid->nr_vports--; + mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); + + /* Destroy the vFID if no vPorts are assigned to it anymore. */ + if (!vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp_port->mlxsw_sp, vfid); return 0; } @@ -1245,6 +1354,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; bool usable; + size_t bytes; int err; dev = alloc_etherdev(sizeof(struct mlxsw_sp_port)); @@ -1258,6 +1368,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port->learning_sync = 1; mlxsw_sp_port->uc_flood = 1; mlxsw_sp_port->pvid = 1; + bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); + mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); + if (!mlxsw_sp_port->active_vlans) { + err = -ENOMEM; + goto err_port_active_vlans_alloc; + } + INIT_LIST_HEAD(&mlxsw_sp_port->vports_list); mlxsw_sp_port->pcpu_stats = netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats); @@ -1359,16 +1476,27 @@ err_port_module_check: err_dev_addr_init: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: + kfree(mlxsw_sp_port->active_vlans); +err_port_active_vlans_alloc: free_netdev(dev); return err; } -static void mlxsw_sp_vfids_fini(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) { - u16 vfid; + struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_port *mlxsw_sp_vport, *tmp; - for_each_set_bit(vfid, mlxsw_sp->active_vfids, VLAN_N_VID) - mlxsw_sp_vfid_destroy(mlxsw_sp, vfid); + list_for_each_entry_safe(mlxsw_sp_vport, tmp, + &mlxsw_sp_port->vports_list, vport.list) { + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + + /* vPorts created for VLAN devices should already be gone + * by now, since we unregistered the port netdev. + */ + WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev)); + mlxsw_sp_port_kill_vid(dev, 0, vid); + } } static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) @@ -1377,10 +1505,11 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; - mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_vports_fini(mlxsw_sp_port); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); free_percpu(mlxsw_sp_port->pcpu_stats); + kfree(mlxsw_sp_port->active_vlans); free_netdev(mlxsw_sp_port->dev); } @@ -1662,16 +1791,15 @@ static int __mlxsw_sp_flood_init(struct mlxsw_core *mlxsw_core, enum mlxsw_sp_flood_table flood_table; char sfgc_pl[MLXSW_REG_SFGC_LEN]; - if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID) { + if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID) table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; - flood_table = 0; - } else { + else table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; - if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST) - flood_table = MLXSW_SP_FLOOD_TABLE_UC; - else - flood_table = MLXSW_SP_FLOOD_TABLE_BM; - } + + if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST) + flood_table = MLXSW_SP_FLOOD_TABLE_UC; + else + flood_table = MLXSW_SP_FLOOD_TABLE_BM; mlxsw_reg_sfgc_pack(sfgc_pl, type, bridge_type, table_type, flood_table); @@ -1682,9 +1810,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) { int type, err; - /* For non-offloaded netdevs, flood all traffic types to CPU - * port. - */ for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) { if (type == MLXSW_REG_SFGC_TYPE_RESERVED) continue; @@ -1693,15 +1818,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) MLXSW_REG_SFGC_BRIDGE_TYPE_VFID); if (err) return err; - } - - /* For bridged ports, use one flooding table for unknown unicast - * traffic and a second table for unregistered multicast and - * broadcast. - */ - for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) { - if (type == MLXSW_REG_SFGC_TYPE_RESERVED) - continue; err = __mlxsw_sp_flood_init(mlxsw_sp->core, type, MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID); @@ -1736,6 +1852,8 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; + INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list); + INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list); err = mlxsw_sp_base_mac_get(mlxsw_sp); if (err) { @@ -1746,7 +1864,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); - goto err_ports_create; + return err; } err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE); @@ -1796,8 +1914,6 @@ err_rx_listener_register: mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); err_event_register: mlxsw_sp_ports_remove(mlxsw_sp); -err_ports_create: - mlxsw_sp_vfids_fini(mlxsw_sp); return err; } @@ -1809,7 +1925,6 @@ static void mlxsw_sp_fini(void *priv) mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); mlxsw_sp_ports_remove(mlxsw_sp); - mlxsw_sp_vfids_fini(mlxsw_sp); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -1834,8 +1949,8 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .flood_mode = 3, .max_fid_offset_flood_tables = 2, .fid_offset_flood_table_size = VLAN_N_VID - 1, - .max_fid_flood_tables = 1, - .fid_flood_table_size = VLAN_N_VID, + .max_fid_flood_tables = 2, + .fid_flood_table_size = MLXSW_SP_VFID_MAX, .used_max_ib_mc = 1, .max_ib_mc = 0, .used_max_pkey = 1, @@ -2147,6 +2262,54 @@ static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); } +static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev); + +static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *vlan_dev) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + u16 vid = vlan_dev_vlan_id(vlan_dev); + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) { + WARN_ON(!mlxsw_sp_vport); + return -EINVAL; + } + + mlxsw_sp_vport->dev = vlan_dev; + + return 0; +} + +static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *vlan_dev) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + u16 vid = vlan_dev_vlan_id(vlan_dev); + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) { + WARN_ON(!mlxsw_sp_vport); + return -EINVAL; + } + + /* When removing a VLAN device while still bridged we should first + * remove it from the bridge, as we receive the bridge's notification + * when the vPort is already gone. + */ + if (mlxsw_sp_vport->bridged) { + struct net_device *br_dev; + + br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev); + } + + mlxsw_sp_vport->dev = mlxsw_sp_port->dev; + + return 0; +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, unsigned long event, void *ptr) { @@ -2176,9 +2339,23 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (!info->master) - break; - if (netif_is_bridge_master(upper_dev)) { + if (is_vlan_dev(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_vlan_link(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to link VLAN device\n"); + return NOTIFY_BAD; + } + } else { + err = mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to unlink VLAN device\n"); + return NOTIFY_BAD; + } + } + } else if (netif_is_bridge_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); if (err) @@ -2271,6 +2448,370 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, return NOTIFY_DONE; } +static struct mlxsw_sp_vfid * +mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + struct mlxsw_sp_vfid *vfid; + + list_for_each_entry(vfid, &mlxsw_sp->br_vfids.list, list) { + if (vfid->br_dev == br_dev) + return vfid; + } + + return NULL; +} + +static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid) +{ + return vfid - MLXSW_SP_VFID_PORT_MAX; +} + +static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid) +{ + return MLXSW_SP_VFID_PORT_MAX + br_vfid; +} + +static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp) +{ + return find_first_zero_bit(mlxsw_sp->br_vfids.mapped, + MLXSW_SP_VFID_BR_MAX); +} + +static struct mlxsw_sp_vfid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) +{ + struct device *dev = mlxsw_sp->bus_info->dev; + struct mlxsw_sp_vfid *vfid; + u16 n_vfid; + int err; + + n_vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp)); + if (n_vfid == MLXSW_SP_VFID_MAX) { + dev_err(dev, "No available vFIDs\n"); + return ERR_PTR(-ERANGE); + } + + err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); + if (err) { + dev_err(dev, "Failed to create vFID=%d\n", n_vfid); + return ERR_PTR(err); + } + + vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); + if (!vfid) + goto err_allocate_vfid; + + vfid->vfid = n_vfid; + vfid->br_dev = br_dev; + + list_add(&vfid->list, &mlxsw_sp->br_vfids.list); + set_bit(mlxsw_sp_vfid_to_br_vfid(n_vfid), mlxsw_sp->br_vfids.mapped); + + return vfid; + +err_allocate_vfid: + __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); + return ERR_PTR(-ENOMEM); +} + +static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vfid *vfid) +{ + u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid->vfid); + + clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped); + list_del(&vfid->list); + + __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); + + kfree(vfid); +} + +static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + struct net_device *dev = mlxsw_sp_vport->dev; + struct mlxsw_sp_vfid *vfid, *new_vfid; + int err; + + vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); + if (!vfid) { + WARN_ON(!vfid); + return -EINVAL; + } + + /* We need a vFID to go back to after leaving the bridge's vFID. */ + new_vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); + if (!new_vfid) { + new_vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); + if (IS_ERR(new_vfid)) { + netdev_err(dev, "Failed to create vFID for VID=%d\n", + vid); + return PTR_ERR(new_vfid); + } + } + + /* Invalidate existing {Port, VID} to vFID mapping and create a new + * one for the new vFID. + */ + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + false, + mlxsw_sp_vfid_to_fid(vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", + vfid->vfid); + goto err_port_vid_to_fid_invalidate; + } + + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + true, + mlxsw_sp_vfid_to_fid(new_vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", + new_vfid->vfid); + goto err_port_vid_to_fid_validate; + } + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); + if (err) { + netdev_err(dev, "Failed to disable learning\n"); + goto err_port_vid_learning_set; + } + + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, + false); + if (err) { + netdev_err(dev, "Failed clear to clear flooding\n"); + goto err_vport_flood_set; + } + + /* Switch between the vFIDs and destroy the old one if needed. */ + new_vfid->nr_vports++; + mlxsw_sp_vport->vport.vfid = new_vfid; + vfid->nr_vports--; + if (!vfid->nr_vports) + mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + + mlxsw_sp_vport->learning = 0; + mlxsw_sp_vport->learning_sync = 0; + mlxsw_sp_vport->uc_flood = 0; + mlxsw_sp_vport->bridged = 0; + + return 0; + +err_vport_flood_set: +err_port_vid_learning_set: +err_port_vid_to_fid_validate: +err_port_vid_to_fid_invalidate: + /* Rollback vFID only if new. */ + if (!new_vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp, new_vfid); + return err; +} + +static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev) +{ + struct mlxsw_sp_vfid *old_vfid = mlxsw_sp_vport->vport.vfid; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + struct net_device *dev = mlxsw_sp_vport->dev; + struct mlxsw_sp_vfid *vfid; + int err; + + vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); + if (!vfid) { + vfid = mlxsw_sp_br_vfid_create(mlxsw_sp, br_dev); + if (IS_ERR(vfid)) { + netdev_err(dev, "Failed to create bridge vFID\n"); + return PTR_ERR(vfid); + } + } + + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, true, false); + if (err) { + netdev_err(dev, "Failed to setup flooding for vFID=%d\n", + vfid->vfid); + goto err_port_flood_set; + } + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); + if (err) { + netdev_err(dev, "Failed to enable learning\n"); + goto err_port_vid_learning_set; + } + + /* We need to invalidate existing {Port, VID} to vFID mapping and + * create a new one for the bridge's vFID. + */ + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + false, + mlxsw_sp_vfid_to_fid(old_vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", + old_vfid->vfid); + goto err_port_vid_to_fid_invalidate; + } + + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + true, + mlxsw_sp_vfid_to_fid(vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", + vfid->vfid); + goto err_port_vid_to_fid_validate; + } + + /* Switch between the vFIDs and destroy the old one if needed. */ + vfid->nr_vports++; + mlxsw_sp_vport->vport.vfid = vfid; + old_vfid->nr_vports--; + if (!old_vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp, old_vfid); + + mlxsw_sp_vport->learning = 1; + mlxsw_sp_vport->learning_sync = 1; + mlxsw_sp_vport->uc_flood = 1; + mlxsw_sp_vport->bridged = 1; + + return 0; + +err_port_vid_to_fid_validate: + mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, + mlxsw_sp_vfid_to_fid(old_vfid->vfid), vid); +err_port_vid_to_fid_invalidate: + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); +err_port_vid_learning_set: + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, false); +err_port_flood_set: + if (!vfid->nr_vports) + mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + return err; +} + +static bool +mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, + const struct net_device *br_dev) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, + vport.list) { + if (mlxsw_sp_vport_br_get(mlxsw_sp_vport) == br_dev) + return false; + } + + return true; +} + +static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, + unsigned long event, void *ptr, + u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct netdev_notifier_changeupper_info *info = ptr; + struct mlxsw_sp_port *mlxsw_sp_vport; + struct net_device *upper_dev; + int err; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!info->master || !info->linking) + break; + if (!netif_is_bridge_master(upper_dev)) + return NOTIFY_BAD; + /* We can't have multiple VLAN interfaces configured on + * the same port and being members in the same bridge. + */ + if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, + upper_dev)) + return NOTIFY_BAD; + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (!info->master) + break; + if (info->linking) { + if (!mlxsw_sp_vport) { + WARN_ON(!mlxsw_sp_vport); + return NOTIFY_BAD; + } + err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, + upper_dev); + if (err) { + netdev_err(dev, "Failed to join bridge\n"); + return NOTIFY_BAD; + } + } else { + /* We ignore bridge's unlinking notifications if vPort + * is gone, since we already left the bridge when the + * VLAN device was unlinked from the real device. + */ + if (!mlxsw_sp_vport) + return NOTIFY_DONE; + err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, + upper_dev); + if (err) { + netdev_err(dev, "Failed to leave bridge\n"); + return NOTIFY_BAD; + } + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, + unsigned long event, void *ptr, + u16 vid) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_vport_event(dev, event, ptr, + vid); + if (ret == NOTIFY_BAD) + return ret; + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, + unsigned long event, void *ptr) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_netdevice_vport_event(real_dev, event, ptr, + vid); + else if (netif_is_lag_master(real_dev)) + return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr, + vid); + + return NOTIFY_DONE; +} + static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -2282,6 +2823,9 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, if (netif_is_lag_master(dev)) return mlxsw_sp_netdevice_lag_event(dev, event, ptr); + if (is_vlan_dev(dev)) + return mlxsw_sp_netdevice_vlan_event(dev, event, ptr); + return NOTIFY_DONE; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 48be5a63b9b5..463ed6dcc709 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -41,11 +41,16 @@ #include <linux/netdevice.h> #include <linux/bitops.h> #include <linux/if_vlan.h> +#include <linux/list.h> #include <net/switchdev.h> #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID +#define MLXSW_SP_VFID_PORT_MAX 512 /* Non-bridged VLAN interfaces */ +#define MLXSW_SP_VFID_BR_MAX 8192 /* Bridged VLAN interfaces */ +#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX) + #define MLXSW_SP_LAG_MAX 64 #define MLXSW_SP_PORT_PER_LAG_MAX 16 @@ -56,8 +61,38 @@ struct mlxsw_sp_upper { unsigned int ref_count; }; +struct mlxsw_sp_vfid { + struct list_head list; + u16 nr_vports; + u16 vfid; /* Starting at 0 */ + struct net_device *br_dev; + u16 vid; +}; + +static inline u16 mlxsw_sp_vfid_to_fid(u16 vfid) +{ + return MLXSW_SP_VFID_BASE + vfid; +} + +static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) +{ + return fid - MLXSW_SP_VFID_BASE; +} + +static inline bool mlxsw_sp_fid_is_vfid(u16 fid) +{ + return fid >= MLXSW_SP_VFID_BASE; +} + struct mlxsw_sp { - unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; + struct { + struct list_head list; + unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_PORT_MAX)]; + } port_vfids; + struct { + struct list_head list; + unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_BR_MAX)]; + } br_vfids; unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; struct mlxsw_sp_port **ports; struct mlxsw_core *core; @@ -102,11 +137,15 @@ struct mlxsw_sp_port { lagged:1; u16 pvid; u16 lag_id; + struct { + struct list_head list; + struct mlxsw_sp_vfid *vfid; + u16 vid; + } vport; /* 802.1Q bridge VLANs */ - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + unsigned long *active_vlans; /* VLAN interfaces */ - unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; - u16 nr_vfids; + struct list_head vports_list; }; static inline struct mlxsw_sp_port * @@ -121,6 +160,59 @@ mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; } +static inline bool +mlxsw_sp_port_is_vport(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->vport.vfid; +} + +static inline struct net_device * +mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vfid->br_dev; +} + +static inline u16 +mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vid; +} + +static inline u16 +mlxsw_sp_vport_vfid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vfid->vfid; +} + +static inline struct mlxsw_sp_port * +mlxsw_sp_port_vport_find(const struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, + vport.list) { + if (mlxsw_sp_vport_vid_get(mlxsw_sp_vport) == vid) + return mlxsw_sp_vport; + } + + return NULL; +} + +static inline struct mlxsw_sp_port * +mlxsw_sp_port_vport_find_by_vfid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 vfid) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, + vport.list) { + if (mlxsw_sp_vport_vfid_get(mlxsw_sp_vport) == vfid) + return mlxsw_sp_vport; + } + + return NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, @@ -143,5 +235,7 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid); int mlxsw_sp_port_kill_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid); +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, + bool set, bool only_uc); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 406dab2f6b17..9476ff9237ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -51,12 +51,33 @@ #include "core.h" #include "reg.h" +static struct mlxsw_sp_port * +mlxsw_sp_port_orig_get(struct net_device *dev, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + u16 vid; + + if (!is_vlan_dev(dev)) + return mlxsw_sp_port; + + vid = vlan_dev_vlan_id(dev); + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + WARN_ON(!mlxsw_sp_vport); + + return mlxsw_sp_vport; +} + static int mlxsw_sp_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); @@ -105,8 +126,14 @@ static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!spms_pl) return -ENOMEM; mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + } else { + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + } err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); kfree(spms_pl); @@ -124,22 +151,38 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); } +static bool mlxsw_sp_vfid_is_vport_br(u16 vfid) +{ + return vfid >= MLXSW_SP_VFID_PORT_MAX; +} + static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid_begin, u16 fid_end, bool set, + u16 idx_begin, u16 idx_end, bool set, bool only_uc) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u16 range = fid_end - fid_begin + 1; + u16 local_port = mlxsw_sp_port->local_port; + enum mlxsw_flood_table_type table_type; + u16 range = idx_end - idx_begin + 1; char *sftr_pl; int err; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; + if (mlxsw_sp_vfid_is_vport_br(idx_begin)) + local_port = mlxsw_sp_port->local_port; + else + local_port = MLXSW_PORT_CPU_PORT; + } else { + table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; + } + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); if (!sftr_pl) return -ENOMEM; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, fid_begin, - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range, - mlxsw_sp_port->local_port, set); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, + table_type, range, local_port, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); if (err) goto buffer_out; @@ -150,9 +193,8 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, if (only_uc) goto buffer_out; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, fid_begin, - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range, - mlxsw_sp_port->local_port, set); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin, + table_type, range, local_port, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); buffer_out: @@ -167,6 +209,13 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, last_visited_vid; int err; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + + return __mlxsw_sp_port_flood_set(mlxsw_sp_port, vfid, vfid, + set, true); + } + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid, vid, set, true); @@ -185,6 +234,16 @@ err_port_flood_set: return err; } +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, + bool set, bool only_uc) +{ + /* In case of vFIDs, index into the flooding table is relative to + * the start of the vFIDs range. + */ + return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, + only_uc); +} + static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, unsigned long brport_flags) @@ -244,6 +303,10 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, @@ -304,7 +367,7 @@ static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) { enum mlxsw_reg_svfa_mt mt; - if (mlxsw_sp_port->nr_vfids) + if (!list_empty(&mlxsw_sp_port->vports_list)) mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; else mt = MLXSW_REG_SVFA_MT_VID_TO_FID; @@ -316,7 +379,7 @@ static int mlxsw_sp_port_fid_unmap(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) { enum mlxsw_reg_svfa_mt mt; - if (!mlxsw_sp_port->nr_vfids) + if (list_empty(&mlxsw_sp_port->vports_list)) return 0; mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; @@ -503,7 +566,7 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) } static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port, - const char *mac, u16 vid, bool adding, + const char *mac, u16 fid, bool adding, bool dynamic) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -516,7 +579,7 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, + mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, mlxsw_sp_port->local_port); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); @@ -525,8 +588,8 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, - const char *mac, u16 vid, bool adding, - bool dynamic) + const char *mac, u16 fid, u16 lag_vid, + bool adding, bool dynamic) { char *sfd_pl; int err; @@ -537,8 +600,8 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, - lag_id); + mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, + lag_vid, lag_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); @@ -550,21 +613,30 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - u16 vid = fdb->vid; + u16 fid = fdb->vid; + u16 lag_vid = 0; if (switchdev_trans_ph_prepare(trans)) return 0; - if (!vid) - vid = mlxsw_sp_port->pvid; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + + fid = mlxsw_sp_vfid_to_fid(vfid); + lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + } + + if (!fid) + fid = mlxsw_sp_port->pvid; if (!mlxsw_sp_port->lagged) return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, - fdb->addr, vid, true, false); + fdb->addr, fid, true, false); else return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_port->lag_id, - fdb->addr, vid, true, false); + fdb->addr, fid, lag_vid, + true, false); } static int mlxsw_sp_port_obj_add(struct net_device *dev, @@ -574,8 +646,15 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) + return 0; + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); @@ -679,14 +758,24 @@ static int mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb) { + u16 fid = fdb->vid; + u16 lag_vid = 0; + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + + fid = mlxsw_sp_vfid_to_fid(vfid); + lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + } + if (!mlxsw_sp_port->lagged) return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, - fdb->addr, fdb->vid, + fdb->addr, fid, false, false); else return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_port->lag_id, - fdb->addr, fdb->vid, + fdb->addr, fid, lag_vid, false, false); } @@ -696,8 +785,15 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) + return 0; + err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; @@ -732,9 +828,10 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, switchdev_obj_dump_cb_t *cb) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 vport_vid = 0, vport_fid = 0; char *sfd_pl; char mac[ETH_ALEN]; - u16 vid; + u16 fid; u8 local_port; u16 lag_id; u8 num_rec; @@ -746,6 +843,14 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (!sfd_pl) return -ENOMEM; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + u16 tmp; + + tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + vport_fid = mlxsw_sp_vfid_to_fid(tmp); + vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + } + mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); @@ -764,12 +869,17 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, for (i = 0; i < num_rec; i++) { switch (mlxsw_reg_sfd_rec_type_get(sfd_pl, i)) { case MLXSW_REG_SFD_REC_TYPE_UNICAST: - mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &vid, + mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid, &local_port); if (local_port == mlxsw_sp_port->local_port) { + if (vport_fid && vport_fid != fid) + continue; + else if (vport_fid) + fdb->vid = vport_vid; + else + fdb->vid = fid; ether_addr_copy(fdb->addr, mac); fdb->ndm_state = NUD_REACHABLE; - fdb->vid = vid; err = cb(&fdb->obj); if (err) stored_err = err; @@ -777,12 +887,17 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, break; case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, - mac, &vid, &lag_id); + mac, &fid, &lag_id); if (mlxsw_sp_port == mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) { + if (vport_fid && vport_fid != fid) + continue; + else if (vport_fid) + fdb->vid = vport_vid; + else + fdb->vid = fid; ether_addr_copy(fdb->addr, mac); fdb->ndm_state = NUD_REACHABLE; - fdb->vid = vid; err = cb(&fdb->obj); if (err) stored_err = err; @@ -804,6 +919,13 @@ static int mlxsw_sp_port_vlan_dump(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid; int err = 0; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + vlan->flags = 0; + vlan->vid_begin = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + vlan->vid_end = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + return cb(&vlan->obj); + } + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { vlan->flags = 0; if (vid == mlxsw_sp_port->pvid) @@ -824,6 +946,10 @@ static int mlxsw_sp_port_obj_dump(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: err = mlxsw_sp_port_vlan_dump(mlxsw_sp_port, @@ -871,17 +997,35 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_port *mlxsw_sp_port; char mac[ETH_ALEN]; u8 local_port; - u16 vid; + u16 vid, fid; int err; - mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &vid, &local_port); + mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port); mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n"); return; } - err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid, + if (mlxsw_sp_fid_is_vfid(fid)) { + u16 vfid = mlxsw_sp_fid_to_vfid(fid); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, + vfid); + if (!mlxsw_sp_vport) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); + return; + } + + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + /* Override the physical port with the vPort. */ + mlxsw_sp_port = mlxsw_sp_vport; + } else { + vid = fid; + } + + err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, fid, adding && mlxsw_sp_port->learning, true); if (err) { if (net_ratelimit()) @@ -900,18 +1044,38 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_port *mlxsw_sp_port; char mac[ETH_ALEN]; + u16 lag_vid = 0; u16 lag_id; - u16 vid; + u16 vid, fid; int err; - mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id); + mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &fid, &lag_id); mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); if (!mlxsw_sp_port) { dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n"); return; } - err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid, + if (mlxsw_sp_fid_is_vfid(fid)) { + u16 vfid = mlxsw_sp_fid_to_vfid(fid); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, + vfid); + if (!mlxsw_sp_vport) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); + return; + } + + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + lag_vid = vid; + /* Override the physical port with the vPort. */ + mlxsw_sp_port = mlxsw_sp_vport; + } else { + vid = fid; + } + + err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, adding && mlxsw_sp_port->learning, true); if (err) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 038ac6b14a60..7060539d276a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2071,7 +2071,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || @@ -2080,7 +2080,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, (l4_hdr == IPPROTO_UDP && (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c index 08d4be616064..e097e6baaac4 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c @@ -500,7 +500,7 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) val = XsumTX; pch_gbe_validate_option(&val, &opt, adapter); if (!val) - dev->features &= ~NETIF_F_ALL_CSUM; + dev->features &= ~NETIF_F_CSUM_MASK; } { /* Flow Control */ static const struct pch_gbe_option opt = { diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 120cc2565d16..3448eb0f8a4a 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -882,6 +882,7 @@ static int ravb_phy_init(struct net_device *ndev) struct ravb_private *priv = netdev_priv(ndev); struct phy_device *phydev; struct device_node *pn; + int err; priv->link = 0; priv->speed = 0; @@ -889,6 +890,17 @@ static int ravb_phy_init(struct net_device *ndev) /* Try connecting to PHY */ pn = of_parse_phandle(np, "phy-handle", 0); + if (!pn) { + /* In the case of a fixed PHY, the DT node associated + * to the PHY is the Ethernet MAC DT node. + */ + if (of_phy_is_fixed_link(np)) { + err = of_phy_register_fixed_link(np); + if (err) + return err; + } + pn = of_node_get(np); + } phydev = of_phy_connect(ndev, pn, ravb_adjust_link, 0, priv->phy_interface); if (!phydev) { diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b405349a570c..6f697438545d 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3128,10 +3128,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev, net_dev->features |= (efx->type->offload_features | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_RXCSUM); - if (efx->type->offload_features & NETIF_F_V6_CSUM) + if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ - net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | + net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | NETIF_F_RXCSUM); /* All offloads can be toggled */ diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index f4518bc2cd28..1e19c8fd8b82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -439,7 +439,7 @@ struct stmmac_ops { /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data); - void (*config_sub_second_increment) (void __iomem *ioaddr); + u32 (*config_sub_second_increment) (void __iomem *ioaddr, u32 clk_rate); int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec); int (*config_addend) (void __iomem *ioaddr, u32 addend); int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 401383b252a8..f0d797ab74d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -32,6 +32,7 @@ #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 #define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2 #define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK 0x00000003 +#define SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000010 #define EMAC_SPLITTER_CTRL_REG 0x0 #define EMAC_SPLITTER_CTRL_SPEED_MASK 0x3 @@ -47,6 +48,7 @@ struct socfpga_dwmac { struct regmap *sys_mgr_base_addr; struct reset_control *stmmac_rst; void __iomem *splitter_base; + bool f2h_ptp_ref_clk; }; static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) @@ -116,6 +118,8 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * return -EINVAL; } + dwmac->f2h_ptp_ref_clk = of_property_read_bool(np, "altr,f2h_ptp_ref_clk"); + np_splitter = of_parse_phandle(np, "altr,emac-splitter", 0); if (np_splitter) { if (of_address_to_resource(np_splitter, 0, &res_splitter)) { @@ -171,6 +175,11 @@ static int socfpga_dwmac_setup(struct socfpga_dwmac *dwmac) ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK << reg_shift); ctrl |= val << reg_shift; + if (dwmac->f2h_ptp_ref_clk) + ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); + else + ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2)); + regmap_write(sys_mgr_base_addr, reg_offset, ctrl); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 76ad214b4036..a77f68918010 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -33,22 +33,25 @@ static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data) writel(data, ioaddr + PTP_TCR); } -static void stmmac_config_sub_second_increment(void __iomem *ioaddr) +static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, + u32 ptp_clock) { u32 value = readl(ioaddr + PTP_TCR); unsigned long data; /* Convert the ptp_clock to nano second - * formula = (1/ptp_clock) * 1000000000 + * formula = (2/ptp_clock) * 1000000000 * where, ptp_clock = 50MHz. */ - data = (1000000000ULL / 50000000); + data = (2000000000ULL / ptp_clock); /* 0.465ns accuracy */ if (!(value & PTP_TCR_TSCTRLSSR)) data = (data * 1000) / 465; writel(data, ioaddr + PTP_SSIR); + + return data; } static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3c6549aee11d..6d4c33a7f0b4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -53,6 +53,7 @@ #include "stmmac.h" #include <linux/reset.h> #include <linux/of_mdio.h> +#include "dwmac1000.h" #define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) @@ -185,7 +186,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_100_150M; else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M)) priv->clk_csr = STMMAC_CSR_150_250M; - else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) + else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; } } @@ -435,6 +436,7 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) u32 ts_master_en = 0; u32 ts_event_en = 0; u32 value = 0; + u32 sec_inc; if (!(priv->dma_cap.time_stamp || priv->adv_ts)) { netdev_alert(priv->dev, "No support for HW time stamping\n"); @@ -598,24 +600,19 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) tstamp_all | ptp_v2 | ptp_over_ethernet | ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en | ts_master_en | snap_type_sel); - priv->hw->ptp->config_hw_tstamping(priv->ioaddr, value); /* program Sub Second Increment reg */ - priv->hw->ptp->config_sub_second_increment(priv->ioaddr); + sec_inc = priv->hw->ptp->config_sub_second_increment( + priv->ioaddr, priv->clk_ptp_rate); + temp = div_u64(1000000000ULL, sec_inc); /* calculate default added value: * formula is : * addend = (2^32)/freq_div_ratio; - * where, freq_div_ratio = clk_ptp_ref_i/50MHz - * hence, addend = ((2^32) * 50MHz)/clk_ptp_ref_i; - * NOTE: clk_ptp_ref_i should be >= 50MHz to - * achieve 20ns accuracy. - * - * 2^x * y == (y << x), hence - * 2^32 * 50000000 ==> (50000000 << 32) + * where, freq_div_ratio = 1e9ns/sec_inc */ - temp = (u64) (50000000ULL << 32); + temp = (u64)(temp << 32); priv->default_addend = div_u64(temp, priv->clk_ptp_rate); priv->hw->ptp->config_addend(priv->ioaddr, priv->default_addend); @@ -2402,7 +2399,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, features &= ~NETIF_F_RXCSUM; if (!priv->plat->tx_coe) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; /* Some GMAC devices have a bugged Jumbo frame support that * needs to have the Tx COE disabled for oversized frames @@ -2410,7 +2407,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, * the TX csum insertionin the TDES and not use SF. */ if (priv->plat->bugged_jumbo && (dev->mtu > ETH_DATA_LEN)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index bba670c42e37..16c85ccd1762 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -29,7 +29,7 @@ #include <linux/slab.h> #include <linux/of.h> #include <linux/of_gpio.h> - +#include <linux/of_mdio.h> #include <asm/io.h> #include "stmmac.h" @@ -200,10 +200,29 @@ int stmmac_mdio_register(struct net_device *ndev) struct stmmac_priv *priv = netdev_priv(ndev); struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; int addr, found; + struct device_node *mdio_node = NULL; + struct device_node *child_node = NULL; if (!mdio_bus_data) return 0; + if (IS_ENABLED(CONFIG_OF)) { + for_each_child_of_node(priv->device->of_node, child_node) { + if (of_device_is_compatible(child_node, + "snps,dwmac-mdio")) { + mdio_node = child_node; + break; + } + } + + if (mdio_node) { + netdev_dbg(ndev, "FOUND MDIO subnode\n"); + } else { + netdev_err(ndev, "NO MDIO subnode\n"); + return 0; + } + } + new_bus = mdiobus_alloc(); if (new_bus == NULL) return -ENOMEM; @@ -231,7 +250,8 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->irq = irqlist; new_bus->phy_mask = mdio_bus_data->phy_mask; new_bus->parent = priv->device; - err = mdiobus_register(new_bus); + + err = of_mdiobus_register(new_bus, mdio_node); if (err != 0) { pr_err("%s: Cannot register as MDIO bus\n", new_bus->name); goto bus_register_fail; @@ -284,7 +304,7 @@ int stmmac_mdio_register(struct net_device *ndev) } } - if (!found) { + if (!found && !mdio_node) { pr_warn("%s: No PHY found\n", ndev->name); mdiobus_unregister(new_bus); mdiobus_free(new_bus); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index d02691ba3d7f..6a52fa18cbf2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -146,7 +146,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); - if (plat->phy_node || plat->phy_bus_name) + if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name) plat->mdio_bus_data = NULL; else plat->mdio_bus_data = diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0750d7a93878..31b19fdf659d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -380,8 +380,11 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6, static void geneve_notify_add_rx_port(struct geneve_sock *gs) { + struct net_device *dev; struct sock *sk = gs->sock->sk; + struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; + __be16 port = inet_sk(sk)->inet_sport; int err; if (sa_family == AF_INET) { @@ -390,6 +393,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) pr_warn("geneve: udp_add_offload failed with status %d\n", err); } + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->netdev_ops->ndo_add_geneve_port) + dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, + port); + } + rcu_read_unlock(); } static int geneve_hlen(struct genevehdr *gh) @@ -530,8 +541,20 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, static void geneve_notify_del_rx_port(struct geneve_sock *gs) { + struct net_device *dev; struct sock *sk = gs->sock->sk; + struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; + __be16 port = inet_sk(sk)->inet_sport; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->netdev_ops->ndo_del_geneve_port) + dev->netdev_ops->ndo_del_geneve_port(dev, sa_family, + port); + } + + rcu_read_unlock(); if (sa_family == AF_INET) udp_del_offload(&gs->udp_offloads); @@ -1086,6 +1109,30 @@ static struct device_type geneve_type = { .name = "geneve", }; +/* Calls the ndo_add_geneve_port of the caller in order to + * supply the listening GENEVE udp ports. Callers are expected + * to implement the ndo_add_geneve_port. + */ +void geneve_get_rx_port(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_sock *gs; + sa_family_t sa_family; + struct sock *sk; + __be16 port; + + rcu_read_lock(); + list_for_each_entry_rcu(gs, &gn->sock_list, list) { + sk = gs->sock->sk; + sa_family = sk->sk_family; + port = inet_sk(sk)->inet_sport; + dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(geneve_get_rx_port); + /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a9268db4e349..f94392d07126 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -88,7 +88,7 @@ static struct lock_class_key ipvlan_netdev_xmit_lock_key; static struct lock_class_key ipvlan_netdev_addr_lock_key; #define IPVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index dc7d970bd1c0..a400288cb37b 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -175,7 +175,7 @@ static void loopback_setup(struct net_device *dev) | NETIF_F_UFO | NETIF_F_HW_CSUM | NETIF_F_RXCSUM - | NETIF_F_SCTP_CSUM + | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 06c8bfeaccd6..6a57a005e0ca 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -758,11 +758,11 @@ static struct lock_class_key macvlan_netdev_xmit_lock_key; static struct lock_class_key macvlan_netdev_addr_lock_key; #define ALWAYS_ON_FEATURES \ - (NETIF_F_SG | NETIF_F_GEN_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \ NETIF_F_GSO_ROBUST) #define MACVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0fc521941c71..d636d051fac8 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -388,7 +388,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) * check, we either support them all or none. */ if (skb->ip_summed == CHECKSUM_PARTIAL && - !(features & NETIF_F_ALL_CSUM) && + !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto drop; skb_queue_tail(&q->sk.sk_receive_queue, skb); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 059c0f60a2b2..915f60fce186 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -981,7 +981,7 @@ static void team_port_disable(struct team *team, team_lower_state_changed(port); } -#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) @@ -2091,7 +2091,6 @@ static void team_setup(struct net_device *dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); dev->features |= dev->hw_features; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d9427ca3dba7..34642a9583e0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1986,7 +1986,7 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, int offset = skb_transport_offset(skb); if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset) - features &= ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) features &= ~NETIF_F_GSO_MASK; diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index f4424063b860..0efe7112fc1f 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1625,7 +1625,7 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp) /* crc offload */ if (likely(lport->crc_offload)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb); skb->csum_offset = skb->len; crc = 0; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 679785b0209c..9de4f23910d8 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -69,7 +69,7 @@ ksocknal_lib_zc_capable(ksock_conn_t *conn) /* ZC if the socket supports scatter/gather and doesn't need software * checksums */ - return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0); + return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_CSUM_MASK) != 0); } int diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 05f5879821b8..a5f6ce6b578c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -621,7 +621,7 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 0e707f0c1a3e..7c27fa1030e8 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -3,6 +3,7 @@ #include <uapi/linux/inet_diag.h> +struct net; struct sock; struct inet_hashinfo; struct nlattr; @@ -23,6 +24,10 @@ struct inet_diag_handler { void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); + + int (*destroy)(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req); + __u16 idiag_type; __u16 idiag_info_size; }; @@ -41,6 +46,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req); +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); extern int inet_diag_register(const struct inet_diag_handler *handler); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index f0d87347df19..d9654f0eecb3 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -52,7 +52,7 @@ enum { NETIF_F_GSO_TUNNEL_REMCSUM_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ - NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ + NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ NETIF_F_FCOE_MTU_BIT, /* Supports max FCoE MTU, 2158 bytes*/ NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ @@ -103,7 +103,7 @@ enum { #define NETIF_F_NTUPLE __NETIF_F(NTUPLE) #define NETIF_F_RXCSUM __NETIF_F(RXCSUM) #define NETIF_F_RXHASH __NETIF_F(RXHASH) -#define NETIF_F_SCTP_CSUM __NETIF_F(SCTP_CSUM) +#define NETIF_F_SCTP_CRC __NETIF_F(SCTP_CRC) #define NETIF_F_SG __NETIF_F(SG) #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) @@ -146,10 +146,12 @@ enum { #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ NETIF_F_TSO6 | NETIF_F_UFO) -#define NETIF_F_GEN_CSUM NETIF_F_HW_CSUM -#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) -#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) +/* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be + * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- + * this would be contradictory + */ +#define NETIF_F_CSUM_MASK (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ + NETIF_F_HW_CSUM) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1bb21ff0fa64..81b26a543a3c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1013,6 +1013,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * a new port starts listening. The operation is protected by the * vxlan_net->sock_lock. * + * void (*ndo_add_geneve_port)(struct net_device *dev, + * sa_family_t sa_family, __be16 port); + * Called by geneve to notify a driver about the UDP port and socket + * address family that geneve is listnening to. It is called only when + * a new port starts listening. The operation is protected by the + * geneve_net->sock_lock. + * + * void (*ndo_del_geneve_port)(struct net_device *dev, + * sa_family_t sa_family, __be16 port); + * Called by geneve to notify the driver about a UDP port and socket + * address family that geneve is not listening to anymore. The operation + * is protected by the geneve_net->sock_lock. + * * void (*ndo_del_vxlan_port)(struct net_device *dev, * sa_family_t sa_family, __be16 port); * Called by vxlan to notify the driver about a UDP port and socket @@ -1217,7 +1230,12 @@ struct net_device_ops { void (*ndo_del_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); - + void (*ndo_add_geneve_port)(struct net_device *dev, + sa_family_t sa_family, + __be16 port); + void (*ndo_del_geneve_port)(struct net_device *dev, + sa_family_t sa_family, + __be16 port); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void (*ndo_dfwd_del_station)(struct net_device *pdev, @@ -2522,6 +2540,71 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } +struct skb_csum_offl_spec { + __u16 ipv4_okay:1, + ipv6_okay:1, + encap_okay:1, + ip_options_okay:1, + ext_hdrs_okay:1, + tcp_okay:1, + udp_okay:1, + sctp_okay:1, + vlan_okay:1, + no_encapped_ipv6:1, + no_not_encapped:1; +}; + +bool __skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help); + +static inline bool skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return false; + + return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); +} + +static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec) +{ + bool csum_encapped; + + return skb_csum_offload_chk(skb, spec, &csum_encapped, true); +} + +static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) +{ + static const struct skb_csum_offl_spec csum_offl_spec = { + .ipv4_okay = 1, + .ip_options_okay = 1, + .ipv6_okay = 1, + .vlan_okay = 1, + .tcp_okay = 1, + .udp_okay = 1, + }; + + return skb_csum_offload_chk_help(skb, &csum_offl_spec); +} + +static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) +{ + static const struct skb_csum_offl_spec csum_offl_spec = { + .ipv4_okay = 1, + .ip_options_okay = 1, + .tcp_okay = 1, + .udp_okay = 1, + .vlan_okay = 1, + }; + + return skb_csum_offload_chk_help(skb, &csum_offl_spec); +} + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3691,13 +3774,37 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); + if (protocol == htons(ETH_P_FCOE)) + return !!(features & NETIF_F_FCOE_CRC); + + /* Assume this is an IP checksum (not SCTP CRC) */ + + if (features & NETIF_F_HW_CSUM) { + /* Can checksum everything */ + return true; + } + + switch (protocol) { + case htons(ETH_P_IP): + return !!(features & NETIF_F_IP_CSUM); + case htons(ETH_P_IPV6): + return !!(features & NETIF_F_IPV6_CSUM); + default: + return false; + } +} + +/* Map an ethertype into IP protocol if possible */ +static inline int eproto_to_ipproto(int eproto) +{ + switch (eproto) { + case htons(ETH_P_IP): + return IPPROTO_IP; + case htons(ETH_P_IPV6): + return IPPROTO_IPV6; + default: + return -1; + } } #ifdef CONFIG_BUG @@ -3762,15 +3869,14 @@ void linkwatch_run_queue(void); static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { - if (f1 & NETIF_F_GEN_CSUM) - f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); - if (f2 & NETIF_F_GEN_CSUM) - f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); - f1 &= f2; - if (f1 & NETIF_F_GEN_CSUM) - f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + if ((f1 ^ f2) & NETIF_F_HW_CSUM) { + if (f1 & NETIF_F_HW_CSUM) + f1 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + else + f2 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + } - return f1; + return f1 & f2; } static inline netdev_features_t netdev_get_wanted_features( diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 639e9b8b0e4d..0b41959aab9f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); @@ -153,6 +154,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 843ceca9a21e..77deece15fb3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -819,4 +819,86 @@ out: return err; } +/* Internal function, please use rhashtable_replace_fast() instead */ +static inline int __rhashtable_replace_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj_old, struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t *lock; + unsigned int hash; + int err = -ENOENT; + + /* Minimally, the old and new objects must have same hash + * (which should mean identifiers are the same). + */ + hash = rht_head_hashfn(ht, tbl, obj_old, params); + if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) + return -EINVAL; + + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj_old) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(obj_new->next, obj_old->next); + rcu_assign_pointer(*pprev, obj_new); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_replace_fast - replace an object in hash table + * @ht: hash table + * @obj_old: pointer to hash head inside object being replaced + * @obj_new: pointer to hash head inside object which is new + * @params: hash table parameters + * + * Replacing an object doesn't affect the number of elements in the hash table + * or bucket, so we don't need to worry about shrinking or expanding the + * table here. + * + * Returns zero on success, -ENOENT if the entry could not be found, + * -EINVAL if hash is not the same for the old and new objects. + */ +static inline int rhashtable_replace_fast( + struct rhashtable *ht, struct rhash_head *obj_old, + struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, + obj_new, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + + return err; +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af4f6ac025b6..6b6bd42d6134 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -39,11 +39,55 @@ #include <linux/in6.h> #include <net/flow.h> -/* A. Checksumming of received packets by device. +/* The interface for checksum offload between the stack and networking drivers + * is as follows... + * + * A. IP checksum related features + * + * Drivers advertise checksum offload capabilities in the features of a device. + * From the stack's point of view these are capabilities offered by the driver, + * a driver typically only advertises features that it is capable of offloading + * to its device. + * + * The checksum related features are: + * + * NETIF_F_HW_CSUM - The driver (or its device) is able to compute one + * IP (one's complement) checksum for any combination + * of protocols or protocol layering. The checksum is + * computed and set in a packet per the CHECKSUM_PARTIAL + * interface (see below). + * + * NETIF_F_IP_CSUM - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv4. These are specifically + * unencapsulated packets of the form IPv4|TCP or + * IPv4|UDP where the Protocol field in the IPv4 header + * is TCP or UDP. The IPv4 header may contain IP options + * This feature cannot be set in features for a device + * with NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv6. These are specifically + * unencapsulated packets of the form IPv6|TCP or + * IPv4|UDP where the Next Header field in the IPv6 + * header is either TCP or UDP. IPv6 extension headers + * are not supported with this feature. This feature + * cannot be set in features for a device with + * NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * NETIF_F_RXCSUM - Driver (device) performs receive checksum offload. + * This flag is used only used to disable the RX checksum + * feature for a device. The stack will accept receive + * checksum indication in packets received on a device + * regardless of whether NETIF_F_RXCSUM is set. + * + * B. Checksumming of received packets by device. Indication of checksum + * verification is in set skb->ip_summed. Possible values are: * * CHECKSUM_NONE: * - * Device failed to checksum this packet e.g. due to lack of capabilities. + * Device did not checksum this packet e.g. due to lack of capabilities. * The packet contains full (though not verified) checksum in packet but * not in skb->csum. Thus, skb->csum is undefined in this case. * @@ -53,9 +97,8 @@ * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY * if their checksums are okay. skb->csum is still undefined in this case - * though. It is a bad option, but, unfortunately, nowadays most vendors do - * this. Apparently with the secret goal to sell you new devices, when you - * will add new protocol to your host, f.e. IPv6 8) + * though. A driver or device must never modify the checksum field in the + * packet even if checksum is verified. * * CHECKSUM_UNNECESSARY is applicable to following protocols: * TCP: IPv6 and IPv4. @@ -96,40 +139,77 @@ * packet that are after the checksum being offloaded are not considered to * be verified. * - * B. Checksumming on output. - * - * CHECKSUM_NONE: - * - * The skb was already checksummed by the protocol, or a checksum is not - * required. + * C. Checksumming on transmit for non-GSO. The stack requests checksum offload + * in the skb->ip_summed for a packet. Values are: * * CHECKSUM_PARTIAL: * - * The device is required to checksum the packet as seen by hard_start_xmit() + * The driver is required to checksum the packet as seen by hard_start_xmit() * from skb->csum_start up to the end, and to record/write the checksum at - * offset skb->csum_start + skb->csum_offset. + * offset skb->csum_start + skb->csum_offset. A driver may verify that the + * csum_start and csum_offset values are valid values given the length and + * offset of the packet, however they should not attempt to validate that the + * checksum refers to a legitimate transport layer checksum-- it is the + * purview of the stack to validate that csum_start and csum_offset are set + * correctly. + * + * When the stack requests checksum offload for a packet, the driver MUST + * ensure that the checksum is set correctly. A driver can either offload the + * checksum calculation to the device, or call skb_checksum_help (in the case + * that the device does not support offload for a particular checksum). + * + * NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of + * NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate + * checksum offload capability. If a device has limited checksum capabilities + * (for instance can only perform NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM as + * described above) a helper function can be called to resolve + * CHECKSUM_PARTIAL. The helper functions are skb_csum_off_chk*. The helper + * function takes a spec argument that describes the protocol layer that is + * supported for checksum offload and can be called for each packet. If a + * packet does not match the specification for offload, skb_checksum_help + * is called to resolve the checksum. * - * The device must show its capabilities in dev->features, set up at device - * setup time, e.g. netdev_features.h: + * CHECKSUM_NONE: * - * NETIF_F_HW_CSUM - It's a clever device, it's able to checksum everything. - * NETIF_F_IP_CSUM - Device is dumb, it's able to checksum only TCP/UDP over - * IPv4. Sigh. Vendors like this way for an unknown reason. - * Though, see comment above about CHECKSUM_UNNECESSARY. 8) - * NETIF_F_IPV6_CSUM - About as dumb as the last one but does IPv6 instead. - * NETIF_F_... - Well, you get the picture. + * The skb was already checksummed by the protocol, or a checksum is not + * required. * * CHECKSUM_UNNECESSARY: * - * Normally, the device will do per protocol specific checksumming. Protocol - * implementations that do not want the NIC to perform the checksum - * calculation should use this flag in their outgoing skbs. - * - * NETIF_F_FCOE_CRC - This indicates that the device can do FCoE FC CRC - * offload. Correspondingly, the FCoE protocol driver - * stack should use CHECKSUM_UNNECESSARY. + * This has the same meaning on as CHECKSUM_NONE for checksum offload on + * output. * - * Any questions? No questions, good. --ANK + * CHECKSUM_COMPLETE: + * Not used in checksum output. If a driver observes a packet with this value + * set in skbuff, if should treat as CHECKSUM_NONE being set. + * + * D. Non-IP checksum (CRC) offloads + * + * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of + * offloading the SCTP CRC in a packet. To perform this offload the stack + * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset + * accordingly. Note the there is no indication in the skbuff that the + * CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports + * both IP checksum offload and SCTP CRC offload must verify which offload + * is configured for a packet presumably by inspecting packet headers. + * + * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of + * offloading the FCOE CRC in a packet. To perform this offload the stack + * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset + * accordingly. Note the there is no indication in the skbuff that the + * CHECKSUM_PARTIAL refers to an FCOE checksum, a driver that supports + * both IP checksum offload and FCOE CRC offload must verify which offload + * is configured for a packet presumably by inspecting packet headers. + * + * E. Checksumming on output with GSO. + * + * In the case of a GSO packet (skb_is_gso(skb) is true), checksum offload + * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the + * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as + * part of the GSO operation is implied. If a checksum is being offloaded + * with GSO then ip_summed is CHECKSUM_PARTIAL, csum_start and csum_offset + * are set to refer to the outermost checksum being offload (two offloaded + * checksums are possible with UDP encapsulation). */ /* Don't change this without changing skb_csum_unnecessary! */ @@ -1939,6 +2019,11 @@ static inline unsigned char *skb_inner_transport_header(const struct sk_buff return skb->head + skb->inner_transport_header; } +static inline int skb_inner_transport_offset(const struct sk_buff *skb) +{ + return skb_inner_transport_header(skb) - skb->data; +} + static inline void skb_reset_inner_transport_header(struct sk_buff *skb) { skb->inner_transport_header = skb->data - skb->head; diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index fddebc617469..4018b48f2b3b 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -15,6 +15,7 @@ struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); + int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk) } void sock_diag_broadcast_destroy(struct sock *sk); +int sock_diag_destroy(struct sock *sk, int err); #endif diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b6b6dcb018d..43c0e771f417 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @flags: flags * @policy: attribute validation policy * @doit: standard command callback + * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * @ops_list: operations list @@ -122,6 +123,7 @@ struct genl_ops { const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); diff --git a/include/net/geneve.h b/include/net/geneve.h index 3106ed6eae0d..e6c23dc765f7 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,6 +62,14 @@ struct genevehdr { struct geneve_opt options[]; }; +#if IS_ENABLED(CONFIG_GENEVE) +void geneve_get_rx_port(struct net_device *netdev); +#else +static inline void geneve_get_rx_port(struct net_device *netdev) +{ +} +#endif + #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); diff --git a/include/net/ila.h b/include/net/ila.h new file mode 100644 index 000000000000..9f4f43e94ae4 --- /dev/null +++ b/include/net/ila.h @@ -0,0 +1,18 @@ +/* + * ILA kernel interface + * + * Copyright (c) 2015 Tom Herbert <tom@herbertland.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#ifndef _NET_ILA_H +#define _NET_ILA_H + +int ila_xlat_outgoing(struct sk_buff *skb); +int ila_xlat_incoming(struct sk_buff *skb); + +#endif /* _NET_ILA_H */ diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 8ba379f9e467..c501d67172b1 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -89,6 +89,13 @@ struct netns_sctp { int pf_retrans; /* + * Disable Potentially-Failed feature, the feature is enabled by default + * pf_enable - 0 : disable pf + * - >0 : enable pf + */ + int pf_enable; + + /* * Policy for preforming sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf * 1 - do sctp accounting, each asoc may use sk_sndbuf bytes diff --git a/include/net/sock.h b/include/net/sock.h index 0ca22b014de1..6e6e8a25d997 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1060,6 +1060,7 @@ struct proto { void (*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk, int err); }; int proto_register(struct proto *prot, int alloc_slab); @@ -1791,6 +1792,15 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) sk->sk_route_caps &= ~flags; } +static inline bool sk_check_csum_caps(struct sock *sk) +{ + return (sk->sk_route_caps & NETIF_F_HW_CSUM) || + (sk->sk_family == PF_INET && + (sk->sk_route_caps & NETIF_F_IP_CSUM)) || + (sk->sk_family == PF_INET6 && + (sk->sk_route_caps & NETIF_F_IPV6_CSUM)); +} + static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, char *to, int copy, int offset) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 1d22ce9f352e..6612946167fe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -50,6 +50,7 @@ enum switchdev_attr_id { }; struct switchdev_attr { + struct net_device *orig_dev; enum switchdev_attr_id id; u32 flags; union { @@ -68,6 +69,7 @@ enum switchdev_obj_id { }; struct switchdev_obj { + struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index f80e74c5ad18..3077735b348d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1170,6 +1170,8 @@ void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); +int tcp_abort(struct sock *sk, int err); + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index c1c899c3a51b..b5a1aec1a167 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -232,7 +232,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index 7ed9e670814e..abde7bbd6f3b 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -3,13 +3,35 @@ #ifndef _UAPI_LINUX_ILA_H #define _UAPI_LINUX_ILA_H +/* NETLINK_GENERIC related info */ +#define ILA_GENL_NAME "ila" +#define ILA_GENL_VERSION 0x1 + enum { ILA_ATTR_UNSPEC, ILA_ATTR_LOCATOR, /* u64 */ + ILA_ATTR_IDENTIFIER, /* u64 */ + ILA_ATTR_LOCATOR_MATCH, /* u64 */ + ILA_ATTR_IFINDEX, /* s32 */ + ILA_ATTR_DIR, /* u32 */ __ILA_ATTR_MAX, }; #define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) +enum { + ILA_CMD_UNSPEC, + ILA_CMD_ADD, + ILA_CMD_DEL, + ILA_CMD_GET, + + __ILA_CMD_MAX, +}; + +#define ILA_CMD_MAX (__ILA_CMD_MAX - 1) + +#define ILA_DIR_IN (1 << 0) +#define ILA_DIR_OUT (1 << 1) + #endif /* _UAPI_LINUX_ILA_H */ diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index 49230d36f9ce..bae2d80034d4 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -4,6 +4,7 @@ #include <linux/types.h> #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY 21 struct sock_diag_req { __u8 sdiag_family; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index fded86508117..ad5e2fd1012c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -30,6 +30,7 @@ #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <net/arp.h> +#include <net/switchdev.h> #include "vlan.h" #include "vlanproc.h" @@ -542,9 +543,9 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | - NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM | + NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; dev->features |= real_dev->vlan_features | NETIF_F_LLTX | @@ -774,6 +775,12 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, .ndo_get_iflink = vlan_dev_get_iflink, }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a642bb829d09..82e3e9705017 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -135,6 +135,7 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) { struct switchdev_obj_port_fdb fdb = { .obj = { + .orig_dev = f->dst->dev, .id = SWITCHDEV_OBJ_ID_PORT_FDB, .flags = SWITCHDEV_F_DEFER, }, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 5f3f64553179..b3cca126b103 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -40,6 +40,7 @@ void br_log_state(const struct net_bridge_port *p) void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, .flags = SWITCHDEV_F_DEFER, .u.stp_state = state, @@ -570,6 +571,7 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) { struct switchdev_attr attr = { + .orig_dev = br->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, .u.ageing_time = ageing_time, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 5396ff08af32..775e00fbeb1e 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -37,6 +37,7 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) void br_init_port(struct net_bridge_port *p) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, .u.ageing_time = p->br->ageing_time, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 1394da63614a..66c4549efbbb 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -73,6 +73,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, u16 vid, u16 flags) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = flags, .vid_begin = vid, @@ -120,6 +121,7 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, u16 vid) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid_begin = vid, .vid_end = vid, diff --git a/net/core/dev.c b/net/core/dev.c index 8f705fcedb94..914b4a24c654 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -138,6 +138,7 @@ #include <linux/errqueue.h> #include <linux/hrtimer.h> #include <linux/netfilter_ingress.h> +#include <linux/sctp.h> #include "net-sysfs.h" @@ -2471,6 +2472,141 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +/* skb_csum_offload_check - Driver helper function to determine if a device + * with limited checksum offload capabilities is able to offload the checksum + * for a given packet. + * + * Arguments: + * skb - sk_buff for the packet in question + * spec - contains the description of what device can offload + * csum_encapped - returns true if the checksum being offloaded is + * encpasulated. That is it is checksum for the transport header + * in the inner headers. + * checksum_help - when set indicates that helper function should + * call skb_checksum_help if offload checks fail + * + * Returns: + * true: Packet has passed the checksum checks and should be offloadable to + * the device (a driver may still need to check for additional + * restrictions of its device) + * false: Checksum is not offloadable. If checksum_help was set then + * skb_checksum_help was called to resolve checksum for non-GSO + * packets and when IP protocol is not SCTP + */ +bool __skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help) +{ + struct iphdr *iph; + struct ipv6hdr *ipv6; + void *nhdr; + int protocol; + u8 ip_proto; + + if (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) { + if (!spec->vlan_okay) + goto need_help; + } + + /* We check whether the checksum refers to a transport layer checksum in + * the outermost header or an encapsulated transport layer checksum that + * corresponds to the inner headers of the skb. If the checksum is for + * something else in the packet we need help. + */ + if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { + /* Non-encapsulated checksum */ + protocol = eproto_to_ipproto(vlan_get_protocol(skb)); + nhdr = skb_network_header(skb); + *csum_encapped = false; + if (spec->no_not_encapped) + goto need_help; + } else if (skb->encapsulation && spec->encap_okay && + skb_checksum_start_offset(skb) == + skb_inner_transport_offset(skb)) { + /* Encapsulated checksum */ + *csum_encapped = true; + switch (skb->inner_protocol_type) { + case ENCAP_TYPE_ETHER: + protocol = eproto_to_ipproto(skb->inner_protocol); + break; + case ENCAP_TYPE_IPPROTO: + protocol = skb->inner_protocol; + break; + } + nhdr = skb_inner_network_header(skb); + } else { + goto need_help; + } + + switch (protocol) { + case IPPROTO_IP: + if (!spec->ipv4_okay) + goto need_help; + iph = nhdr; + ip_proto = iph->protocol; + if (iph->ihl != 5 && !spec->ip_options_okay) + goto need_help; + break; + case IPPROTO_IPV6: + if (!spec->ipv6_okay) + goto need_help; + if (spec->no_encapped_ipv6 && *csum_encapped) + goto need_help; + ipv6 = nhdr; + nhdr += sizeof(*ipv6); + ip_proto = ipv6->nexthdr; + break; + default: + goto need_help; + } + +ip_proto_again: + switch (ip_proto) { + case IPPROTO_TCP: + if (!spec->tcp_okay || + skb->csum_offset != offsetof(struct tcphdr, check)) + goto need_help; + break; + case IPPROTO_UDP: + if (!spec->udp_okay || + skb->csum_offset != offsetof(struct udphdr, check)) + goto need_help; + break; + case IPPROTO_SCTP: + if (!spec->sctp_okay || + skb->csum_offset != offsetof(struct sctphdr, checksum)) + goto cant_help; + break; + case NEXTHDR_HOP: + case NEXTHDR_ROUTING: + case NEXTHDR_DEST: { + u8 *opthdr = nhdr; + + if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) + goto need_help; + + ip_proto = opthdr[0]; + nhdr += (opthdr[1] + 1) << 3; + + goto ip_proto_again; + } + default: + goto need_help; + } + + /* Passed the tests for offloading checksum */ + return true; + +need_help: + if (csum_help && !skb_shinfo(skb)->gso_size) + skb_checksum_help(skb); +cant_help: + return false; +} +EXPORT_SYMBOL(__skb_csum_offload_chk); + __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; @@ -2645,7 +2781,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -2792,7 +2928,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device else skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_ALL_CSUM) && + if (!(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto out_kfree_skb; } @@ -6467,9 +6603,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ - if (!((features & NETIF_F_GEN_CSUM) || - (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) - == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (!(features & NETIF_F_HW_CSUM) && + ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { netdev_dbg(dev, "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; @@ -7571,16 +7707,16 @@ static int dev_cpu_callback(struct notifier_block *nfb, netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { - if (mask & NETIF_F_GEN_CSUM) - mask |= NETIF_F_ALL_CSUM; + if (mask & NETIF_F_HW_CSUM) + mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; - all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask; all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ - if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + if (all & NETIF_F_HW_CSUM) + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM); return all; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 29edf74846fc..09948a726347 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -87,7 +87,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", - [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", + [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", [NETIF_F_RXHASH_BIT] = "rx-hashing", @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f88a62ab019d..bca8c350e7f3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -471,6 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (dev_isalive(netdev)) { struct switchdev_attr attr = { + .orig_dev = netdev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de8d5cc5eb24..2be144498bcf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2898,7 +2898,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V4_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; udp4_hwcsum(skb, iph->saddr, iph->daddr); @@ -3032,7 +3032,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V6_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34ba7a08876d..d8b0113d3eec 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1027,6 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 0c1d58d43f67..a996ce8c8fb2 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(&sock_diag_table_mutex); return err; @@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group) return 0; } +int sock_diag_destroy(struct sock *sk, int err) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, err); +} +EXPORT_SYMBOL_GPL(sock_diag_destroy); + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 416dfa004cfb..c22920525e5d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -436,6 +436,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ab9f8a66615d..8bb8e7ad8548 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - const struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req) { - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; struct sock *sk; - int err; - err = -EINVAL; if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, req->id.idiag_if); #endif else - goto out_nosk; + return ERR_PTR(-EINVAL); - err = -ENOENT; if (!sk) - goto out_nosk; + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { @@ -409,12 +422,11 @@ out: if (sk) sock_gen_put(sk); -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { @@ -424,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -938,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -972,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -980,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; @@ -999,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); } static @@ -1050,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e0b94cd843d7..568e2bc0d93d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -911,7 +911,7 @@ static int __ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->dst.dev->features & NETIF_F_V4_CSUM && + rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && !(flags & MSG_MORE) && !exthdrlen) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 5075b7ecd26d..61c7cc22ea68 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -132,7 +132,8 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca18c90f..2c0e340518d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + !sk_check_csum_caps(sk)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + if (sk_check_csum_caps(sk)) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); @@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b31604086edd..4d610934fb39 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include <linux/module.h> +#include <linux/net.h> +#include <linux/sock_diag.h> #include <linux/inet_diag.h> #include <linux/tcp.h> @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db003438aaf5..7aa13bd3de29 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2342,6 +2342,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0c7b0e61b917..8841e984f8bf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -772,7 +772,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, else if (skb_is_gso(skb)) uh->check = ~udp_v4_check(len, saddr, daddr, 0); else if (skb_dst(skb) && skb_dst(skb)->dev && - (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + (skb_dst(skb)->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9386160cbee..130042660181 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -60,8 +60,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, /* Try to offload checksum if possible */ offload_csum = !!(need_csum && - (skb->dev->features & - (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM))); + ((skb->dev->features & NETIF_F_HW_CSUM) || + (skb->dev->features & (is_ipv6 ? + NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & features; diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2c900c7b7eb1..2fbd90bf8d33 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o -obj-$(CONFIG_IPV6_ILA) += ila.o +obj-$(CONFIG_IPV6_ILA) += ila/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile new file mode 100644 index 000000000000..4b32e5921e5c --- /dev/null +++ b/net/ipv6/ila/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for ILA module +# + +obj-$(CONFIG_IPV6_ILA) += ila.o + +ila-objs := ila_common.o ila_lwt.o ila_xlat.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h new file mode 100644 index 000000000000..28542cb2b387 --- /dev/null +++ b/net/ipv6/ila/ila.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2015 Tom Herbert <tom@herbertland.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ + +#ifndef __ILA_H +#define __ILA_H + +#include <linux/errno.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/checksum.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <uapi/linux/ila.h> + +struct ila_params { + __be64 locator; + __be64 locator_match; + __wsum csum_diff; +}; + +static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], to[0], to[1], + }; + + return csum_partial(diff, sizeof(diff), 0); +} + +void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); + +int ila_lwt_init(void); +void ila_lwt_fini(void); +int ila_xlat_init(void); +void ila_xlat_fini(void); + +#endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c new file mode 100644 index 000000000000..32dc9aab7297 --- /dev/null +++ b/net/ipv6/ila/ila_common.c @@ -0,0 +1,103 @@ +#include <linux/errno.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/types.h> +#include <net/checksum.h> +#include <net/ip.h> +#include <net/ip6_fib.h> +#include <net/lwtunnel.h> +#include <net/protocol.h> +#include <uapi/linux/ila.h> +#include "ila.h" + +static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + if (*(__be64 *)&ip6h->daddr == p->locator_match) + return p->csum_diff; + else + return compute_csum_diff8((__be32 *)&ip6h->daddr, + (__be32 *)&p->locator); +} + +void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +{ + __wsum diff; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + size_t nhoff = sizeof(struct ipv6hdr); + + /* First update checksum */ + switch (ip6h->nexthdr) { + case NEXTHDR_TCP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { + struct tcphdr *th = (struct tcphdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&th->check, skb, + diff, true); + } + break; + case NEXTHDR_UDP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { + struct udphdr *uh = (struct udphdr *) + (skb_network_header(skb) + nhoff); + + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&uh->check, skb, + diff, true); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + break; + case NEXTHDR_ICMP: + if (likely(pskb_may_pull(skb, + nhoff + sizeof(struct icmp6hdr)))) { + struct icmp6hdr *ih = (struct icmp6hdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, + diff, true); + } + break; + } + + /* Now change destination address */ + *(__be64 *)&ip6h->daddr = p->locator; +} + +static int __init ila_init(void) +{ + int ret; + + ret = ila_lwt_init(); + + if (ret) + goto fail_lwt; + + ret = ila_xlat_init(); + if (ret) + goto fail_xlat; + + return 0; +fail_xlat: + ila_lwt_fini(); +fail_lwt: + return ret; +} + +static void __exit ila_fini(void) +{ + ila_xlat_fini(); + ila_lwt_fini(); +} + +module_init(ila_init); +module_exit(ila_fini); +MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila.c b/net/ipv6/ila/ila_lwt.c index 1a6852e1ac69..2ae3c4fd8aab 100644 --- a/net/ipv6/ila.c +++ b/net/ipv6/ila/ila_lwt.c @@ -11,12 +11,7 @@ #include <net/lwtunnel.h> #include <net/protocol.h> #include <uapi/linux/ila.h> - -struct ila_params { - __be64 locator; - __be64 locator_match; - __wsum csum_diff; -}; +#include "ila.h" static inline struct ila_params *ila_params_lwtunnel( struct lwtunnel_state *lwstate) @@ -24,73 +19,6 @@ static inline struct ila_params *ila_params_lwtunnel( return (struct ila_params *)lwstate->data; } -static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) -{ - __be32 diff[] = { - ~from[0], ~from[1], to[0], to[1], - }; - - return csum_partial(diff, sizeof(diff), 0); -} - -static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) -{ - if (*(__be64 *)&ip6h->daddr == p->locator_match) - return p->csum_diff; - else - return compute_csum_diff8((__be32 *)&ip6h->daddr, - (__be32 *)&p->locator); -} - -static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) -{ - __wsum diff; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - size_t nhoff = sizeof(struct ipv6hdr); - - /* First update checksum */ - switch (ip6h->nexthdr) { - case NEXTHDR_TCP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { - struct tcphdr *th = (struct tcphdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&th->check, skb, - diff, true); - } - break; - case NEXTHDR_UDP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { - struct udphdr *uh = (struct udphdr *) - (skb_network_header(skb) + nhoff); - - if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&uh->check, skb, - diff, true); - if (!uh->check) - uh->check = CSUM_MANGLED_0; - } - } - break; - case NEXTHDR_ICMP: - if (likely(pskb_may_pull(skb, - nhoff + sizeof(struct icmp6hdr)))) { - struct icmp6hdr *ih = (struct icmp6hdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, - diff, true); - } - break; - } - - /* Now change destination address */ - *(__be64 *)&ip6h->daddr = p->locator; -} - static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -213,17 +141,12 @@ static const struct lwtunnel_encap_ops ila_encap_ops = { .cmp_encap = ila_encap_cmp, }; -static int __init ila_init(void) +int ila_lwt_init(void) { return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); } -static void __exit ila_fini(void) +void ila_lwt_fini(void) { lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); } - -module_init(ila_init); -module_exit(ila_fini); -MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); -MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c new file mode 100644 index 000000000000..295ca29a23c3 --- /dev/null +++ b/net/ipv6/ila/ila_xlat.c @@ -0,0 +1,680 @@ +#include <linux/jhash.h> +#include <linux/netfilter.h> +#include <linux/rcupdate.h> +#include <linux/rhashtable.h> +#include <linux/vmalloc.h> +#include <net/genetlink.h> +#include <net/ila.h> +#include <net/netns/generic.h> +#include <uapi/linux/genetlink.h> +#include "ila.h" + +struct ila_xlat_params { + struct ila_params ip; + __be64 identifier; + int ifindex; + unsigned int dir; +}; + +struct ila_map { + struct ila_xlat_params p; + struct rhash_head node; + struct ila_map __rcu *next; + struct rcu_head rcu; +}; + +static unsigned int ila_net_id; + +struct ila_net { + struct rhashtable rhash_table; + spinlock_t *locks; /* Bucket locks for entry manipulation */ + unsigned int locks_mask; + bool hooks_registered; +}; + +#define LOCKS_PER_CPU 10 + +static int alloc_ila_locks(struct ila_net *ilan) +{ + unsigned int i, size; + unsigned int nr_pcpus = num_possible_cpus(); + + nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); + + if (sizeof(spinlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(spinlock_t) > PAGE_SIZE) + ilan->locks = vmalloc(size * sizeof(spinlock_t)); + else +#endif + ilan->locks = kmalloc_array(size, sizeof(spinlock_t), + GFP_KERNEL); + if (!ilan->locks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&ilan->locks[i]); + } + ilan->locks_mask = size - 1; + + return 0; +} + +static u32 hashrnd __read_mostly; +static __always_inline void __ila_hash_secret_init(void) +{ + net_get_random_once(&hashrnd, sizeof(hashrnd)); +} + +static inline u32 ila_identifier_hash(__be64 identifier) +{ + u32 *v = (u32 *)&identifier; + + return jhash_2words(v[0], v[1], hashrnd); +} + +static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier) +{ + return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask]; +} + +static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc, + int ifindex, unsigned int dir) +{ + return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) || + (ila->p.ifindex && ila->p.ifindex != ifindex) || + !(ila->p.dir & dir); +} + +static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p) +{ + return (ila->p.ip.locator_match != p->ip.locator_match) || + (ila->p.ifindex != p->ifindex) || + (ila->p.dir != p->dir); +} + +static int ila_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct ila_map *ila = obj; + + return (ila->p.identifier != *(__be64 *)arg->key); +} + +static inline int ila_order(struct ila_map *ila) +{ + int score = 0; + + if (ila->p.ip.locator_match) + score += 1 << 0; + + if (ila->p.ifindex) + score += 1 << 1; + + return score; +} + +static const struct rhashtable_params rht_params = { + .nelem_hint = 1024, + .head_offset = offsetof(struct ila_map, node), + .key_offset = offsetof(struct ila_map, p.identifier), + .key_len = sizeof(u64), /* identifier */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, + .obj_cmpfn = ila_cmpfn, +}; + +static struct genl_family ila_nl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, +}; + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, + [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, + [ILA_ATTR_DIR] = { .type = NLA_U32, }, +}; + +static int parse_nl_config(struct genl_info *info, + struct ila_xlat_params *p) +{ + memset(p, 0, sizeof(*p)); + + if (info->attrs[ILA_ATTR_IDENTIFIER]) + p->identifier = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_IDENTIFIER]); + + if (info->attrs[ILA_ATTR_LOCATOR]) + p->ip.locator = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR]); + + if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) + p->ip.locator_match = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR_MATCH]); + + if (info->attrs[ILA_ATTR_IFINDEX]) + p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); + + if (info->attrs[ILA_ATTR_DIR]) + p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]); + + return 0; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, + int ifindex, + unsigned int dir, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params); + while (ila) { + if (!ila_cmp_wildcards(ila, loc, ifindex, dir)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + while (ila) { + if (!ila_cmp_params(ila, p)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +static inline void ila_release(struct ila_map *ila) +{ + kfree_rcu(ila, rcu); +} + +static void ila_free_cb(void *ptr, void *arg) +{ + struct ila_map *ila = (struct ila_map *)ptr, *next; + + /* Assume rcu_readlock held */ + while (ila) { + next = rcu_access_pointer(ila->next); + ila_release(ila); + ila = next; + } +} + +static int ila_xlat_addr(struct sk_buff *skb, int dir); + +static unsigned int +ila_nf_input(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + ila_xlat_addr(skb, ILA_DIR_IN); + return NF_ACCEPT; +} + +static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { + { + .hook = ila_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = -1, + }, +}; + +static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = 0, order; + + if (!ilan->hooks_registered) { + /* We defer registering net hooks in the namespace until the + * first mapping is added. + */ + err = nf_register_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); + if (err) + return err; + + ilan->hooks_registered = true; + } + + ila = kzalloc(sizeof(*ila), GFP_KERNEL); + if (!ila) + return -ENOMEM; + + ila->p = *p; + + if (p->ip.locator_match) { + /* Precompute checksum difference for translation since we + * know both the old identifier and the new one. + */ + ila->p.ip.csum_diff = compute_csum_diff8( + (__be32 *)&p->ip.locator_match, + (__be32 *)&p->ip.locator); + } + + order = ila_order(ila); + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + if (!head) { + /* New entry for the rhash_table */ + err = rhashtable_lookup_insert_fast(&ilan->rhash_table, + &ila->node, rht_params); + } else { + struct ila_map *tila = head, *prev = NULL; + + do { + if (!ila_cmp_params(tila, p)) { + err = -EEXIST; + goto out; + } + + if (order > ila_order(tila)) + break; + + prev = tila; + tila = rcu_dereference_protected(tila->next, + lockdep_is_held(lock)); + } while (tila); + + if (prev) { + /* Insert in sub list of head */ + RCU_INIT_POINTER(ila->next, tila); + rcu_assign_pointer(prev->next, ila); + } else { + /* Make this ila new head */ + RCU_INIT_POINTER(ila->next, head); + err = rhashtable_replace_fast(&ilan->rhash_table, + &head->node, + &ila->node, rht_params); + if (err) + goto out; + } + } + +out: + spin_unlock(lock); + + if (err) + kfree(ila); + + return err; +} + +static int ila_del_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head, *prev; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = -ENOENT; + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, + &p->identifier, rht_params); + ila = head; + + prev = NULL; + + while (ila) { + if (ila_cmp_params(ila, p)) { + prev = ila; + ila = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + continue; + } + + err = 0; + + if (prev) { + /* Not head, just delete from list */ + rcu_assign_pointer(prev->next, ila->next); + } else { + /* It is the head. If there is something in the + * sublist we need to make a new head. + */ + head = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + if (head) { + /* Put first entry in the sublist into the + * table + */ + err = rhashtable_replace_fast( + &ilan->rhash_table, &ila->node, + &head->node, rht_params); + if (err) + goto out; + } else { + /* Entry no longer used */ + err = rhashtable_remove_fast(&ilan->rhash_table, + &ila->node, + rht_params); + } + } + + ila_release(ila); + + break; + } + +out: + spin_unlock(lock); + + return err; +} + +static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + return ila_add_mapping(net, &p); +} + +static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + ila_del_mapping(net, &p); + + return 0; +} + +static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) +{ + if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER, + (__force u64)ila->p.identifier) || + nla_put_u64(msg, ILA_ATTR_LOCATOR, + (__force u64)ila->p.ip.locator) || + nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH, + (__force u64)ila->p.ip.locator_match) || + nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) || + nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir)) + return -1; + + return 0; +} + +static int ila_dump_info(struct ila_map *ila, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (ila_fill_info(ila, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct sk_buff *msg; + struct ila_xlat_params p; + struct ila_map *ila; + int ret; + + ret = parse_nl_config(info, &p); + if (ret) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rcu_read_lock(); + + ila = ila_lookup_by_params(&p, ilan); + if (ila) { + ret = ila_dump_info(ila, + info->snd_portid, + info->snd_seq, 0, msg, + info->genlhdr->cmd); + } + + rcu_read_unlock(); + + if (ret < 0) + goto out_free; + + return genlmsg_reply(msg, info); + +out_free: + nlmsg_free(msg); + return ret; +} + +struct ila_dump_iter { + struct rhashtable_iter rhiter; +}; + +static int ila_nl_dump_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter); +} + +static int ila_nl_dump_done(struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + rhashtable_walk_exit(&iter->rhiter); + + return 0; +} + +static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct rhashtable_iter *rhiter = &iter->rhiter; + struct ila_map *ila; + int ret; + + ret = rhashtable_walk_start(rhiter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + ila = rhashtable_walk_next(rhiter); + + if (IS_ERR(ila)) { + if (PTR_ERR(ila) == -EAGAIN) + continue; + ret = PTR_ERR(ila); + goto done; + } else if (!ila) { + break; + } + + while (ila) { + ret = ila_dump_info(ila, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, ILA_CMD_GET); + if (ret) + goto done; + + ila = rcu_access_pointer(ila->next); + } + } + + ret = skb->len; + +done: + rhashtable_walk_stop(rhiter); + return ret; +} + +static const struct genl_ops ila_nl_ops[] = { + { + .cmd = ILA_CMD_ADD, + .doit = ila_nl_cmd_add_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_DEL, + .doit = ila_nl_cmd_del_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_GET, + .doit = ila_nl_cmd_get_mapping, + .start = ila_nl_dump_start, + .dumpit = ila_nl_dump, + .done = ila_nl_dump_done, + .policy = ila_nl_policy, + }, +}; + +#define ILA_HASH_TABLE_SIZE 1024 + +static __net_init int ila_init_net(struct net *net) +{ + int err; + struct ila_net *ilan = net_generic(net, ila_net_id); + + err = alloc_ila_locks(ilan); + if (err) + return err; + + rhashtable_init(&ilan->rhash_table, &rht_params); + + return 0; +} + +static __net_exit void ila_exit_net(struct net *net) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + + rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL); + + kvfree(ilan->locks); + + if (ilan->hooks_registered) + nf_unregister_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); +} + +static struct pernet_operations ila_net_ops = { + .init = ila_init_net, + .exit = ila_exit_net, + .id = &ila_net_id, + .size = sizeof(struct ila_net), +}; + +static int ila_xlat_addr(struct sk_buff *skb, int dir) +{ + struct ila_map *ila; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ila_net *ilan = net_generic(net, ila_net_id); + __be64 identifier, locator_match; + size_t nhoff; + + /* Assumes skb contains a valid IPv6 header that is pulled */ + + identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8]; + locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0]; + nhoff = sizeof(struct ipv6hdr); + + rcu_read_lock(); + + ila = ila_lookup_wildcards(identifier, locator_match, + skb->dev->ifindex, dir, ilan); + if (ila) + update_ipv6_locator(skb, &ila->p.ip); + + rcu_read_unlock(); + + return 0; +} + +int ila_xlat_incoming(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_IN); +} +EXPORT_SYMBOL(ila_xlat_incoming); + +int ila_xlat_outgoing(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_OUT); +} +EXPORT_SYMBOL(ila_xlat_outgoing); + +int ila_xlat_init(void) +{ + int ret; + + ret = register_pernet_device(&ila_net_ops); + if (ret) + goto exit; + + ret = genl_register_family_with_ops(&ila_nl_family, + ila_nl_ops); + if (ret < 0) + goto unregister; + + return 0; + +unregister: + unregister_pernet_device(&ila_net_ops); +exit: + return ret; +} + +void ila_xlat_fini(void) +{ + genl_unregister_family(&ila_nl_family); + unregister_pernet_device(&ila_net_ops); +} diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e6a7bd15b9b7..2f748452b4aa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1322,7 +1322,7 @@ emsgsize: headersize == sizeof(struct ipv6hdr) && length < mtu - headersize && !(flags & MSG_MORE) && - rt->dst.dev->features & NETIF_F_V6_CSUM) + rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 238e70c3f7b7..6ce309928841 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -136,7 +136,8 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt6i_flags & RTF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c16e3fbf6854..5382c2662fa2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 010ddeec135f..d952d67f904d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -169,7 +169,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, /* Only update csum if we really have to */ if (sctph->dest != cp->dport || payload_csum || (skb->ip_summed == CHECKSUM_PARTIAL && - !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc27..81dc1bb6e016 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb = &nlk->cb; memset(cb, 0, sizeof(*cb)); + cb->start = control->start; cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + if (cb->start) + cb->start(cb); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index bc0e504f33a6..8e63662c6fb0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_start(struct netlink_callback *cb) +{ + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->start) { + genl_lock(); + rc = ops->start(cb); + genl_unlock(); + } + return rc; +} + static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* our ops are always const - netlink API doesn't propagate that */ @@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family, .module = family->module, /* we have const, but the netlink API doesn't */ .data = (void *)ops, + .start = genl_lock_start, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family, } else { struct netlink_dump_control c = { .module = family->module, + .start = ops->start, .dump = ops->dumpit, .done = ops->done, }; diff --git a/net/sctp/output.c b/net/sctp/output.c index abe7c2db2412..9d610eddd19e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -534,7 +534,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + if (!(dst->dev->features & NETIF_F_SCTP_CRC) || (dst_xfrm(dst) != NULL) || packet->ipfragok) { sh->checksum = sctp_compute_cksum(nskb, 0); } else { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d9ea9a48289..010aced44b6b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1223,6 +1223,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Max.Burst - 4 */ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + /* Enable pf state by default */ + net->sctp.pf_enable = 1; + /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) * Max.Init.Retransmits - 8 attempts @@ -1419,7 +1422,7 @@ static __init int sctp_init(void) if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) continue; sctp_assoc_hashtable = (struct sctp_hashbucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); } while (!sctp_assoc_hashtable && --order > 0); if (!sctp_assoc_hashtable) { pr_err("Failed association hash alloc\n"); @@ -1452,7 +1455,7 @@ static __init int sctp_init(void) if ((sctp_port_hashsize > (64 * 1024)) && order > 0) continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6098d4c42fa9..05cd16400e0b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -477,6 +477,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, struct sctp_transport *transport, int is_hb) { + struct net *net = sock_net(asoc->base.sk); + /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ @@ -503,7 +505,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, * is SCTP_ACTIVE, then mark this transport as Partially Failed, * see SCTP Quick Failover Draft, section 5.1 */ - if ((transport->state == SCTP_ACTIVE) && + if (net->sctp.pf_enable && + (transport->state == SCTP_ACTIVE) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c565f54..ccbfc93fb8fe 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = { .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, + { + .procname = "pf_enable", + .data = &init_net.sctp.pf_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index f34e535e93bd..df790d3385a2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -723,6 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, u32 filter_mask) { struct switchdev_vlan_dump dump = { + .vlan.obj.orig_dev = dev, .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .skb = skb, .filter_mask = filter_mask, @@ -757,6 +758,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int nlflags) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u16 mode = BRIDGE_MODE_UNDEF; @@ -778,6 +780,7 @@ static int switchdev_port_br_setflag(struct net_device *dev, unsigned long brport_flag) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u8 flag = nla_get_u8(nlattr); @@ -853,6 +856,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, struct nlattr *attr; struct bridge_vlan_info *vinfo; struct switchdev_obj_port_vlan vlan = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, }; int rem; @@ -975,6 +979,7 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], u16 vid, u16 nlm_flags) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1000,6 +1005,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1077,6 +1083,7 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *filter_dev, int idx) { struct switchdev_fdb_dump dump = { + .fdb.obj.orig_dev = dev, .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .dev = dev, .skb = skb, @@ -1135,6 +1142,7 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (!dev) return NULL; + attr.orig_dev = dev; if (switchdev_port_attr_get(dev, &attr)) return NULL; @@ -1194,6 +1202,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) fi->fib_flags |= RTNH_F_OFFLOAD; @@ -1238,6 +1247,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) fi->fib_flags &= ~RTNH_F_OFFLOAD; @@ -1270,10 +1280,12 @@ static bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { struct switchdev_attr a_attr = { + .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; struct switchdev_attr b_attr = { + .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; |