summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arch/s390/driver-model.rst2
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml44
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml3
-rw-r--r--Documentation/netlink/genetlink-c.yaml7
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml10
-rw-r--r--Documentation/netlink/genetlink.yaml7
-rw-r--r--Documentation/netlink/specs/conntrack.yaml643
-rw-r--r--Documentation/netlink/specs/netdev.yaml30
-rw-r--r--Documentation/netlink/specs/nl80211.yaml2000
-rw-r--r--Documentation/netlink/specs/rt_addr.yaml23
-rw-r--r--Documentation/netlink/specs/rt_rule.yaml10
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst2
-rw-r--r--Documentation/networking/devlink/ice.rst11
-rw-r--r--Documentation/networking/devlink/mlx5.rst4
-rw-r--r--Documentation/networking/devlink/sfc.rst16
-rw-r--r--Documentation/networking/ip-sysctl.rst13
-rw-r--r--Documentation/networking/j1939.rst675
-rw-r--r--Documentation/networking/napi.rst33
-rw-r--r--Documentation/networking/net_cachelines/inet_connection_sock.rst1
-rw-r--r--Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst1
-rw-r--r--Documentation/networking/netconsole.rst45
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/s390/include/asm/irq.h1
-rw-r--r--arch/s390/kernel/irq.c1
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2
-rw-r--r--drivers/infiniband/hw/irdma/main.c46
-rw-r--r--drivers/infiniband/hw/irdma/main.h3
-rw-r--r--drivers/net/bonding/bond_main.c3
-rw-r--r--drivers/net/can/c_can/c_can_platform.c51
-rw-r--r--drivers/net/can/flexcan/flexcan-core.c35
-rw-r--r--drivers/net/can/flexcan/flexcan.h5
-rw-r--r--drivers/net/can/rockchip/rockchip_canfd-core.c5
-rw-r--r--drivers/net/can/usb/gs_usb.c5
-rw-r--r--drivers/net/dsa/b53/b53_common.c14
-rw-r--r--drivers/net/dsa/b53/b53_mdio.c1
-rw-r--r--drivers/net/dsa/b53/b53_priv.h2
-rw-r--r--drivers/net/dsa/b53/b53_serdes.c1
-rw-r--r--drivers/net/dsa/mt7530.c69
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-6185.c1
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-6352.c1
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-639x.c4
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c1
-rw-r--r--drivers/net/dsa/rzn1_a5psw.c8
-rw-r--r--drivers/net/dsa/sja1105/sja1105_mdio.c6
-rw-r--r--drivers/net/ethernet/actions/owl-emac.c7
-rw-r--r--drivers/net/ethernet/apm/xgene-v2/mdio.c18
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c14
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c554
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h8
-rw-r--r--drivers/net/ethernet/cadence/macb.h130
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c192
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c16
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.h7
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h7
-rw-r--r--drivers/net/ethernet/cisco/enic/Makefile2
-rw-r--r--drivers/net/ethernet/cisco/enic/enic.h4
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c39
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c274
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_rq.c242
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_rq.h10
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_rq.h2
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c52
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.c1
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c14
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c2
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.h2
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c7
-rw-r--r--drivers/net/ethernet/intel/Kconfig3
-rw-r--r--drivers/net/ethernet/intel/e1000e/mac.c15
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.h10
-rw-r--r--drivers/net/ethernet/intel/iavf/Makefile2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h35
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c228
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ptp.c485
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ptp.h47
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_trace.h6
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c433
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.h24
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_type.h239
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_types.h34
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c203
-rw-r--r--drivers/net/ethernet/intel/ice/devlink/devlink.c102
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h26
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c208
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ddp.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.c29
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_idc.c64
-rw-r--r--drivers/net/ethernet/intel/ice/ice_irq.c275
-rw-r--r--drivers/net/ethernet/intel/ice/ice_irq.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c42
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c33
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c509
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c413
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.h28
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c154
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c80
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.h6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.h8
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c51
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c16
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.h19
-rw-r--r--drivers/net/ethernet/intel/igc/igc_xdp.c19
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c1
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/Makefile2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c122
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h17
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c32
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c188
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c12
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c225
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h24
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c2
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c1
-rw-r--r--drivers/net/ethernet/mediatek/airoha_eth.c10
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c74
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h11
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/alloc.c28
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/hwmon.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/hwmon.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c582
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h1
-rw-r--r--drivers/net/ethernet/meta/fbnic/Makefile3
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic.h6
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_csr.h6
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c174
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c713
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_netdev.c44
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_netdev.h3
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_phylink.c1
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_rpc.c356
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_rpc.h35
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_txrx.c212
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_txrx.h17
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c1
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c1
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c50
-rw-r--r--drivers/net/ethernet/microsoft/mana/hw_channel.c6
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c63
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_hwmon.c40
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c8
-rw-r--r--drivers/net/ethernet/realtek/Kconfig3
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c40
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c7
-rw-r--r--drivers/net/ethernet/sfc/Kconfig5
-rw-r--r--drivers/net/ethernet/sfc/Makefile2
-rw-r--r--drivers/net/ethernet/sfc/ef10.c7
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c1
-rw-r--r--drivers/net/ethernet/sfc/efx_devlink.c13
-rw-r--r--drivers/net/ethernet/sfc/efx_reflash.c514
-rw-r--r--drivers/net/ethernet/sfc/efx_reflash.h20
-rw-r--r--drivers/net/ethernet/sfc/fw_formats.h114
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c115
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h22
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c135
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000.h13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c33
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c96
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c49
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h21
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c163
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c2
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c211
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.h8
-rw-r--r--drivers/net/ethernet/xilinx/Kconfig1
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet.h29
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c315
-rw-r--r--drivers/net/hamradio/baycom_par.c4
-rw-r--r--drivers/net/hamradio/baycom_ser_fdx.c2
-rw-r--r--drivers/net/hamradio/baycom_ser_hdx.c4
-rw-r--r--drivers/net/hyperv/hyperv_net.h2
-rw-r--r--drivers/net/hyperv/netvsc_drv.c15
-rw-r--r--drivers/net/hyperv/rndis_filter.c13
-rw-r--r--drivers/net/netconsole.c261
-rw-r--r--drivers/net/netdevsim/netdev.c10
-rw-r--r--drivers/net/pcs/pcs-lynx.c1
-rw-r--r--drivers/net/pcs/pcs-mtk-lynxi.c1
-rw-r--r--drivers/net/pcs/pcs-rzn1-miic.c22
-rw-r--r--drivers/net/pcs/pcs-xpcs.c90
-rw-r--r--drivers/net/pcs/pcs-xpcs.h26
-rw-r--r--drivers/net/phy/aquantia/aquantia_hwmon.c32
-rw-r--r--drivers/net/phy/broadcom.c2
-rw-r--r--drivers/net/phy/dp83822.c38
-rw-r--r--drivers/net/phy/dp83td510.c187
-rw-r--r--drivers/net/phy/dp83tg720.c78
-rw-r--r--drivers/net/phy/marvell-88q2xxx.c272
-rw-r--r--drivers/net/phy/marvell.c24
-rw-r--r--drivers/net/phy/marvell10g.c24
-rw-r--r--drivers/net/phy/mediatek/mtk-ge-soc.c296
-rw-r--r--drivers/net/phy/mediatek/mtk-ge.c78
-rw-r--r--drivers/net/phy/mediatek/mtk-phy-lib.c77
-rw-r--r--drivers/net/phy/mediatek/mtk.h15
-rw-r--r--drivers/net/phy/phy-c45.c48
-rw-r--r--drivers/net/phy/phy-core.c22
-rw-r--r--drivers/net/phy/phy.c31
-rw-r--r--drivers/net/phy/phy_device.c116
-rw-r--r--drivers/net/phy/phylink.c58
-rw-r--r--drivers/net/phy/realtek/Kconfig8
-rw-r--r--drivers/net/phy/realtek/realtek_main.c119
-rw-r--r--drivers/net/tap.c166
-rw-r--r--drivers/net/tun.c193
-rw-r--r--drivers/net/tun_vnet.h185
-rw-r--r--drivers/net/usb/asix_devices.c17
-rw-r--r--drivers/net/usb/cdc_mbim.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c3
-rw-r--r--drivers/net/usb/r8152.c1
-rw-r--r--drivers/net/vrf.c6
-rw-r--r--drivers/net/vxlan/vxlan_core.c58
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pci.c1
-rw-r--r--drivers/of/base.c27
-rw-r--r--drivers/s390/net/Kconfig11
-rw-r--r--drivers/s390/net/Makefile1
-rw-r--r--drivers/s390/net/lcs.c2385
-rw-r--r--drivers/s390/net/lcs.h342
-rw-r--r--fs/eventpoll.c8
-rw-r--r--include/linux/avf/virtchnl.h135
-rw-r--r--include/linux/mlx4/device.h3
-rw-r--r--include/linux/mlx5/driver.h33
-rw-r--r--include/linux/mlx5/port.h3
-rw-r--r--include/linux/net/intel/iidc.h2
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/of.h9
-rw-r--r--include/linux/pcs/pcs-xpcs.h3
-rw-r--r--include/linux/phy.h73
-rw-r--r--include/linux/phylink.h36
-rw-r--r--include/linux/rtnetlink.h1
-rw-r--r--include/linux/sctp.h1
-rw-r--r--include/linux/stmmac.h4
-rw-r--r--include/linux/unroll.h44
-rw-r--r--include/linux/usb/r8152.h1
-rw-r--r--include/net/busy_poll.h9
-rw-r--r--include/net/fib_rules.h27
-rw-r--r--include/net/inet_connection_sock.h4
-rw-r--r--include/net/ip.h24
-rw-r--r--include/net/ipv6.h11
-rw-r--r--include/net/libeth/rx.h47
-rw-r--r--include/net/netdev_queues.h5
-rw-r--r--include/net/netdev_rx_queue.h1
-rw-r--r--include/net/netlink.h15
-rw-r--r--include/net/netmem.h21
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/page_pool/memory_provider.h45
-rw-r--r--include/net/page_pool/types.h4
-rw-r--r--include/net/sock.h23
-rw-r--r--include/net/tcp.h24
-rw-r--r--include/net/xdp_sock_drv.h43
-rw-r--r--include/net/xsk_buff_pool.h8
-rw-r--r--include/trace/events/tcp.h6
-rw-r--r--include/uapi/linux/can.h3
-rw-r--r--include/uapi/linux/ethtool.h18
-rw-r--r--include/uapi/linux/fib_rules.h2
-rw-r--r--include/uapi/linux/netdev.h13
-rw-r--r--include/uapi/linux/tcp.h1
-rw-r--r--io_uring/napi.c4
-rw-r--r--lib/Kconfig.debug20
-rw-r--r--lib/Makefile2
-rw-r--r--lib/blackhole_dev_kunit.c (renamed from lib/test_blackhole_dev.c)47
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/can/raw.c2
-rw-r--r--net/core/dev.c39
-rw-r--r--net/core/dev.h12
-rw-r--r--net/core/devmem.c93
-rw-r--r--net/core/devmem.h49
-rw-r--r--net/core/fib_rules.c220
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/net-sysfs.c392
-rw-r--r--net/core/netdev-genl.c36
-rw-r--r--net/core/netdev_rx_queue.c106
-rw-r--r--net/core/page_pool.c66
-rw-r--r--net/core/page_pool_user.c9
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/secure_seq.c2
-rw-r--r--net/core/sock.c5
-rw-r--r--net/dccp/ipv4.c3
-rw-r--r--net/dccp/ipv6.c9
-rw-r--r--net/dsa/user.c25
-rw-r--r--net/ethtool/common.c42
-rw-r--r--net/ethtool/ioctl.c16
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/arp.c12
-rw-r--r--net/ipv4/devinet.c77
-rw-r--r--net/ipv4/fib_rules.c12
-rw-r--r--net/ipv4/icmp.c6
-rw-r--r--net/ipv4/igmp.c14
-rw-r--r--net/ipv4/igmp_internal.h17
-rw-r--r--net/ipv4/inet_connection_sock.c78
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inetpeer.c8
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/ping.c6
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/sysctl_net_ipv4.c10
-rw-r--r--net/ipv4/tcp.c71
-rw-r--r--net/ipv4/tcp_fastopen.c6
-rw-r--r--net/ipv4/tcp_input.c42
-rw-r--r--net/ipv4/tcp_ipv4.c57
-rw-r--r--net/ipv4/tcp_minisocks.c14
-rw-r--r--net/ipv4/tcp_output.c30
-rw-r--r--net/ipv4/tcp_timer.c56
-rw-r--r--net/ipv4/udp.c69
-rw-r--r--net/ipv6/fib6_rules.c12
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c15
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/ipv6/udp.c10
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/mptcp/fastopen.c27
-rw-r--r--net/mptcp/pm.c86
-rw-r--r--net/mptcp/pm_netlink.c129
-rw-r--r--net/mptcp/pm_userspace.c203
-rw-r--r--net/mptcp/protocol.c317
-rw-r--r--net/mptcp/protocol.h36
-rw-r--r--net/mptcp/subflow.c36
-rw-r--r--net/netlink/af_netlink.c1
-rw-r--r--net/nfc/hci/llc.c11
-rw-r--r--net/nfc/hci/llc.h1
-rw-r--r--net/packet/af_packet.c9
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xdp/xsk_buff_pool.c46
-rw-r--r--tools/include/uapi/linux/netdev.h13
-rw-r--r--tools/net/ynl/Makefile.deps5
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py46
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py36
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/Makefile5
-rw-r--r--tools/testing/selftests/drivers/net/config1
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py50
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py6
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py46
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py241
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py131
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh24
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_fragmented_msg.sh122
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh167
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py12
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py42
-rw-r--r--tools/testing/selftests/drivers/net/xdp_helper.c98
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile5
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh4
-rwxr-xr-xtools/testing/selftests/net/fdb_flush.sh2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh2
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh36
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh2
-rw-r--r--tools/testing/selftests/net/lib.sh19
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py2
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh2
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py18
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c2
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c2
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.py30
-rw-r--r--tools/testing/selftests/net/so_rcv_listener.c168
-rwxr-xr-xtools/testing/selftests/net/test_blackhole_dev.sh11
-rwxr-xr-xtools/testing/selftests/net/test_so_rcv.sh73
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh111
-rw-r--r--tools/testing/selftests/net/ynl.mk3
449 files changed, 17380 insertions, 8544 deletions
diff --git a/Documentation/arch/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst
index ad4bc2dbea43..ad18f129fb0b 100644
--- a/Documentation/arch/s390/driver-model.rst
+++ b/Documentation/arch/s390/driver-model.rst
@@ -244,7 +244,7 @@ information about the interrupt from the irb parameter.
--------------------
The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
-devices, like lcs or ctc.
+devices, like qeth or ctc.
The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
this attributes creates a ccwgroup device consisting of these ccw devices (if
diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
index 97dd1a7c5ed2..73252fe56fe6 100644
--- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
+++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
@@ -10,9 +10,6 @@ title:
maintainers:
- Marc Kleine-Budde <mkl@pengutronix.de>
-allOf:
- - $ref: can-controller.yaml#
-
properties:
compatible:
oneOf:
@@ -28,6 +25,7 @@ properties:
- fsl,vf610-flexcan
- fsl,ls1021ar2-flexcan
- fsl,lx2160ar1-flexcan
+ - nxp,s32g2-flexcan
- items:
- enum:
- fsl,imx53-flexcan
@@ -43,12 +41,21 @@ properties:
- enum:
- fsl,ls1028ar1-flexcan
- const: fsl,lx2160ar1-flexcan
+ - items:
+ - enum:
+ - nxp,s32g3-flexcan
+ - const: nxp,s32g2-flexcan
reg:
maxItems: 1
interrupts:
- maxItems: 1
+ minItems: 1
+ maxItems: 4
+
+ interrupt-names:
+ minItems: 1
+ maxItems: 4
clocks:
maxItems: 2
@@ -136,6 +143,35 @@ required:
- reg
- interrupts
+allOf:
+ - $ref: can-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nxp,s32g2-flexcan
+ then:
+ properties:
+ interrupts:
+ items:
+ - description: Message Buffer interrupt for mailboxes 0-7 and Enhanced RX FIFO
+ - description: Device state change
+ - description: Bus Error detection
+ - description: Message Buffer interrupt for mailboxes 8-127
+ interrupt-names:
+ items:
+ - const: mb-0
+ - const: state
+ - const: berr
+ - const: mb-1
+ required:
+ - interrupt-names
+ else:
+ properties:
+ interrupts:
+ maxItems: 1
+ interrupt-names: false
+
additionalProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
index 2a98b26630cb..c155c9c6db39 100644
--- a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
@@ -40,7 +40,7 @@ properties:
microchip,rx-int-gpios:
description:
- GPIO phandle of GPIO connected to to INT1 pin of the MCP251XFD, which
+ GPIO phandle of GPIO connected to INT1 pin of the MCP251XFD, which
signals a pending RX interrupt.
maxItems: 1
diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
index 4c78c546343f..d6c957a33b48 100644
--- a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
@@ -16,6 +16,7 @@ properties:
compatible:
oneOf:
- const: brcm,bcm5325
+ - const: brcm,bcm53101
- const: brcm,bcm53115
- const: brcm,bcm53125
- const: brcm,bcm53128
@@ -77,6 +78,7 @@ allOf:
contains:
enum:
- brcm,bcm5325
+ - brcm,bcm53101
- brcm,bcm53115
- brcm,bcm53125
- brcm,bcm53128
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index 2c71454ae8e3..824bbe4333b7 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -232,6 +232,12 @@ properties:
PHY's that have configurable TX internal delays. If this property is
present then the PHY applies the TX delay.
+ tx-amplitude-100base-tx-percent:
+ description:
+ Transmit amplitude gain applied for 100BASE-TX. 100% matches 2V
+ peak-to-peak specified in ANSI X3.263. When omitted, the PHYs default
+ will be left as is.
+
leds:
type: object
diff --git a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml
index 9bcbacb6640d..55d6a8379025 100644
--- a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml
+++ b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml
@@ -44,6 +44,9 @@ properties:
phy-mode:
enum:
- rgmii
+ - rgmii-id
+ - rgmii-rxid
+ - rgmii-txid
- rmii
phy-handle: true
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 9660ffb1ed6a..96fa1f1522ed 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
- # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
+ # literal int, const name, or limit based on fixed-width type
+ # e.g. u8-min, u16-max, etc.
type: [ string, integer ]
- pattern: ^[su](8|16|32|64)-(min|max)$
+ pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs
@@ -160,7 +161,7 @@ properties:
type: string
type: &attr-type
enum: [ unused, pad, flag, binary,
- uint, sint, u8, u16, u32, u64, s32, s64,
+ uint, sint, u8, u16, u32, u64, s8, s16, s32, s64,
string, nest, indexed-array, nest-type-value ]
doc:
description: Documentation of the attribute.
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 16380e12cabe..a8c5b521937d 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
- # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
+ # literal int, const name, or limit based on fixed-width type
+ # e.g. u8-min, u16-max, etc.
type: [ string, integer ]
- pattern: ^[su](8|16|32|64)-(min|max)$
+ pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs
@@ -151,6 +152,9 @@ properties:
the right formatting mechanism when displaying values of this
type.
enum: [ hex, mac, fddi, ipv4, ipv6, uuid ]
+ struct:
+ description: Name of the nested struct type.
+ type: string
# End genetlink-legacy
attribute-sets:
@@ -203,7 +207,7 @@ properties:
type: &attr-type
description: The netlink attribute type
enum: [ unused, pad, flag, binary, bitfield32,
- uint, sint, u8, u16, u32, u64, s32, s64,
+ uint, sint, u8, u16, u32, u64, s8, s16, s32, s64,
string, nest, indexed-array, nest-type-value ]
doc:
description: Documentation of the attribute.
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index b036227b46f1..40efbbad76ab 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
- # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
+ # literal int, const name, or limit based on fixed-width type
+ # e.g. u8-min, u16-max, etc.
type: [ string, integer ]
- pattern: ^[su](8|16|32|64)-(min|max)$
+ pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs
@@ -123,7 +124,7 @@ properties:
type: string
type: &attr-type
enum: [ unused, pad, flag, binary,
- uint, sint, u8, u16, u32, u64, s32, s64,
+ uint, sint, u8, u16, u32, u64, s8, s16, s32, s64,
string, nest, indexed-array, nest-type-value ]
doc:
description: Documentation of the attribute.
diff --git a/Documentation/netlink/specs/conntrack.yaml b/Documentation/netlink/specs/conntrack.yaml
new file mode 100644
index 000000000000..840dc4504216
--- /dev/null
+++ b/Documentation/netlink/specs/conntrack.yaml
@@ -0,0 +1,643 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: conntrack
+protocol: netlink-raw
+protonum: 12
+
+doc:
+ Netfilter connection tracking subsystem over nfnetlink
+
+definitions:
+ -
+ name: nfgenmsg
+ type: struct
+ members:
+ -
+ name: nfgen-family
+ type: u8
+ -
+ name: version
+ type: u8
+ -
+ name: res-id
+ byte-order: big-endian
+ type: u16
+ -
+ name: nf-ct-tcp-flags-mask
+ type: struct
+ members:
+ -
+ name: flags
+ type: u8
+ enum: nf-ct-tcp-flags
+ enum-as-flags: true
+ -
+ name: mask
+ type: u8
+ enum: nf-ct-tcp-flags
+ enum-as-flags: true
+ -
+ name: nf-ct-tcp-flags
+ type: flags
+ entries:
+ - window-scale
+ - sack-perm
+ - close-init
+ - be-liberal
+ - unacked
+ - maxack
+ - challenge-ack
+ - simultaneous-open
+ -
+ name: nf-ct-tcp-state
+ type: enum
+ entries:
+ - none
+ - syn-sent
+ - syn-recv
+ - established
+ - fin-wait
+ - close-wait
+ - last-ack
+ - time-wait
+ - close
+ - syn-sent2
+ - max
+ - ignore
+ - retrans
+ - unack
+ - timeout-max
+ -
+ name: nf-ct-sctp-state
+ type: enum
+ entries:
+ - none
+ - cloned
+ - cookie-wait
+ - cookie-echoed
+ - established
+ - shutdown-sent
+ - shutdown-received
+ - shutdown-ack-sent
+ - shutdown-heartbeat-sent
+ -
+ name: nf-ct-status
+ type: flags
+ entries:
+ - expected
+ - seen-reply
+ - assured
+ - confirmed
+ - src-nat
+ - dst-nat
+ - seq-adj
+ - src-nat-done
+ - dst-nat-done
+ - dying
+ - fixed-timeout
+ - template
+ - nat-clash
+ - helper
+ - offload
+ - hw-offload
+
+attribute-sets:
+ -
+ name: counter-attrs
+ attributes:
+ -
+ name: packets
+ type: u64
+ byte-order: big-endian
+ -
+ name: bytes
+ type: u64
+ byte-order: big-endian
+ -
+ name: packets-old
+ type: u32
+ -
+ name: bytes-old
+ type: u32
+ -
+ name: pad
+ type: pad
+ -
+ name: tuple-proto-attrs
+ attributes:
+ -
+ name: proto-num
+ type: u8
+ doc: l4 protocol number
+ -
+ name: proto-src-port
+ type: u16
+ byte-order: big-endian
+ doc: l4 source port
+ -
+ name: proto-dst-port
+ type: u16
+ byte-order: big-endian
+ doc: l4 source port
+ -
+ name: proto-icmp-id
+ type: u16
+ byte-order: big-endian
+ doc: l4 icmp id
+ -
+ name: proto-icmp-type
+ type: u8
+ -
+ name: proto-icmp-code
+ type: u8
+ -
+ name: proto-icmpv6-id
+ type: u16
+ byte-order: big-endian
+ doc: l4 icmp id
+ -
+ name: proto-icmpv6-type
+ type: u8
+ -
+ name: proto-icmpv6-code
+ type: u8
+ -
+ name: tuple-ip-attrs
+ attributes:
+ -
+ name: ip-v4-src
+ type: u32
+ byte-order: big-endian
+ display-hint: ipv4
+ doc: ipv4 source address
+ -
+ name: ip-v4-dst
+ type: u32
+ byte-order: big-endian
+ display-hint: ipv4
+ doc: ipv4 destination address
+ -
+ name: ip-v6-src
+ type: binary
+ checks:
+ min-len: 16
+ byte-order: big-endian
+ display-hint: ipv6
+ doc: ipv6 source address
+ -
+ name: ip-v6-dst
+ type: binary
+ checks:
+ min-len: 16
+ byte-order: big-endian
+ display-hint: ipv6
+ doc: ipv6 destination address
+ -
+ name: tuple-attrs
+ attributes:
+ -
+ name: tuple-ip
+ type: nest
+ nested-attributes: tuple-ip-attrs
+ doc: conntrack l3 information
+ -
+ name: tuple-proto
+ type: nest
+ nested-attributes: tuple-proto-attrs
+ doc: conntrack l4 information
+ -
+ name: tuple-zone
+ type: u16
+ byte-order: big-endian
+ doc: conntrack zone id
+ -
+ name: protoinfo-tcp-attrs
+ attributes:
+ -
+ name: tcp-state
+ type: u8
+ enum: nf-ct-tcp-state
+ doc: tcp connection state
+ -
+ name: tcp-wscale-original
+ type: u8
+ doc: window scaling factor in original direction
+ -
+ name: tcp-wscale-reply
+ type: u8
+ doc: window scaling factor in reply direction
+ -
+ name: tcp-flags-original
+ type: binary
+ struct: nf-ct-tcp-flags-mask
+ -
+ name: tcp-flags-reply
+ type: binary
+ struct: nf-ct-tcp-flags-mask
+ -
+ name: protoinfo-dccp-attrs
+ attributes:
+ -
+ name: dccp-state
+ type: u8
+ doc: dccp connection state
+ -
+ name: dccp-role
+ type: u8
+ -
+ name: dccp-handshake-seq
+ type: u64
+ byte-order: big-endian
+ -
+ name: dccp-pad
+ type: pad
+ -
+ name: protoinfo-sctp-attrs
+ attributes:
+ -
+ name: sctp-state
+ type: u8
+ doc: sctp connection state
+ enum: nf-ct-sctp-state
+ -
+ name: vtag-original
+ type: u32
+ byte-order: big-endian
+ -
+ name: vtag-reply
+ type: u32
+ byte-order: big-endian
+ -
+ name: protoinfo-attrs
+ attributes:
+ -
+ name: protoinfo-tcp
+ type: nest
+ nested-attributes: protoinfo-tcp-attrs
+ doc: conntrack tcp state information
+ -
+ name: protoinfo-dccp
+ type: nest
+ nested-attributes: protoinfo-dccp-attrs
+ doc: conntrack dccp state information
+ -
+ name: protoinfo-sctp
+ type: nest
+ nested-attributes: protoinfo-sctp-attrs
+ doc: conntrack sctp state information
+ -
+ name: help-attrs
+ attributes:
+ -
+ name: help-name
+ type: string
+ doc: helper name
+ -
+ name: nat-proto-attrs
+ attributes:
+ -
+ name: nat-port-min
+ type: u16
+ byte-order: big-endian
+ -
+ name: nat-port-max
+ type: u16
+ byte-order: big-endian
+ -
+ name: nat-attrs
+ attributes:
+ -
+ name: nat-v4-minip
+ type: u32
+ byte-order: big-endian
+ -
+ name: nat-v4-maxip
+ type: u32
+ byte-order: big-endian
+ -
+ name: nat-v6-minip
+ type: binary
+ -
+ name: nat-v6-maxip
+ type: binary
+ -
+ name: nat-proto
+ type: nest
+ nested-attributes: nat-proto-attrs
+ -
+ name: seqadj-attrs
+ attributes:
+ -
+ name: correction-pos
+ type: u32
+ byte-order: big-endian
+ -
+ name: offset-before
+ type: u32
+ byte-order: big-endian
+ -
+ name: offset-after
+ type: u32
+ byte-order: big-endian
+ -
+ name: secctx-attrs
+ attributes:
+ -
+ name: secctx-name
+ type: string
+ -
+ name: synproxy-attrs
+ attributes:
+ -
+ name: isn
+ type: u32
+ byte-order: big-endian
+ -
+ name: its
+ type: u32
+ byte-order: big-endian
+ -
+ name: tsoff
+ type: u32
+ byte-order: big-endian
+ -
+ name: conntrack-attrs
+ attributes:
+ -
+ name: tuple-orig
+ type: nest
+ nested-attributes: tuple-attrs
+ doc: conntrack l3+l4 protocol information, original direction
+ -
+ name: tuple-reply
+ type: nest
+ nested-attributes: tuple-attrs
+ doc: conntrack l3+l4 protocol information, reply direction
+ -
+ name: status
+ type: u32
+ byte-order: big-endian
+ enum: nf-ct-status
+ enum-as-flags: true
+ doc: conntrack flag bits
+ -
+ name: protoinfo
+ type: nest
+ nested-attributes: protoinfo-attrs
+ -
+ name: help
+ type: nest
+ nested-attributes: help-attrs
+ -
+ name: nat-src
+ type: nest
+ nested-attributes: nat-attrs
+ -
+ name: timeout
+ type: u32
+ byte-order: big-endian
+ -
+ name: mark
+ type: u32
+ byte-order: big-endian
+ -
+ name: counters-orig
+ type: nest
+ nested-attributes: counter-attrs
+ -
+ name: counters-reply
+ type: nest
+ nested-attributes: counter-attrs
+ -
+ name: use
+ type: u32
+ byte-order: big-endian
+ -
+ name: id
+ type: u32
+ byte-order: big-endian
+ -
+ name: nat-dst
+ type: nest
+ nested-attributes: nat-attrs
+ -
+ name: tuple-master
+ type: nest
+ nested-attributes: tuple-attrs
+ -
+ name: seq-adj-orig
+ type: nest
+ nested-attributes: seqadj-attrs
+ -
+ name: seq-adj-reply
+ type: nest
+ nested-attributes: seqadj-attrs
+ -
+ name: secmark
+ type: binary
+ doc: obsolete
+ -
+ name: zone
+ type: u16
+ byte-order: big-endian
+ doc: conntrack zone id
+ -
+ name: secctx
+ type: nest
+ nested-attributes: secctx-attrs
+ -
+ name: timestamp
+ type: u64
+ byte-order: big-endian
+ -
+ name: mark-mask
+ type: u32
+ byte-order: big-endian
+ -
+ name: labels
+ type: binary
+ -
+ name: labels mask
+ type: binary
+ -
+ name: synproxy
+ type: nest
+ nested-attributes: synproxy-attrs
+ -
+ name: filter
+ type: nest
+ nested-attributes: tuple-attrs
+ -
+ name: status-mask
+ type: u32
+ byte-order: big-endian
+ enum: nf-ct-status
+ enum-as-flags: true
+ doc: conntrack flag bits to change
+ -
+ name: timestamp-event
+ type: u64
+ byte-order: big-endian
+ -
+ name: conntrack-stats-attrs
+ attributes:
+ -
+ name: searched
+ type: u32
+ byte-order: big-endian
+ doc: obsolete
+ -
+ name: found
+ type: u32
+ byte-order: big-endian
+ -
+ name: new
+ type: u32
+ byte-order: big-endian
+ doc: obsolete
+ -
+ name: invalid
+ type: u32
+ byte-order: big-endian
+ doc: obsolete
+ -
+ name: ignore
+ type: u32
+ byte-order: big-endian
+ doc: obsolete
+ -
+ name: delete
+ type: u32
+ byte-order: big-endian
+ doc: obsolete
+ -
+ name: delete-list
+ type: u32
+ byte-order: big-endian
+ doc: obsolete
+ -
+ name: insert
+ type: u32
+ byte-order: big-endian
+ -
+ name: insert-failed
+ type: u32
+ byte-order: big-endian
+ -
+ name: drop
+ type: u32
+ byte-order: big-endian
+ -
+ name: early-drop
+ type: u32
+ byte-order: big-endian
+ -
+ name: error
+ type: u32
+ byte-order: big-endian
+ -
+ name: search-restart
+ type: u32
+ byte-order: big-endian
+ -
+ name: clash-resolve
+ type: u32
+ byte-order: big-endian
+ -
+ name: chain-toolong
+ type: u32
+ byte-order: big-endian
+
+operations:
+ enum-model: directional
+ list:
+ -
+ name: get
+ doc: get / dump entries
+ attribute-set: conntrack-attrs
+ fixed-header: nfgenmsg
+ do:
+ request:
+ value: 0x101
+ attributes:
+ - tuple-orig
+ - tuple-reply
+ - zone
+ reply:
+ value: 0x100
+ attributes:
+ - tuple-orig
+ - tuple-reply
+ - status
+ - protoinfo
+ - help
+ - nat-src
+ - nat-dst
+ - timeout
+ - mark
+ - counter-orig
+ - counter-reply
+ - use
+ - id
+ - nat-dst
+ - tuple-master
+ - seq-adj-orig
+ - seq-adj-reply
+ - zone
+ - secctx
+ - labels
+ - synproxy
+ dump:
+ request:
+ value: 0x101
+ attributes:
+ - nfgen-family
+ - mark
+ - filter
+ - status
+ - zone
+ reply:
+ value: 0x100
+ attributes:
+ - tuple-orig
+ - tuple-reply
+ - status
+ - protoinfo
+ - help
+ - nat-src
+ - nat-dst
+ - timeout
+ - mark
+ - counter-orig
+ - counter-reply
+ - use
+ - id
+ - nat-dst
+ - tuple-master
+ - seq-adj-orig
+ - seq-adj-reply
+ - zone
+ - secctx
+ - labels
+ - synproxy
+ -
+ name: get-stats
+ doc: dump pcpu conntrack stats
+ attribute-set: conntrack-stats-attrs
+ fixed-header: nfgenmsg
+ dump:
+ request:
+ value: 0x104
+ reply:
+ value: 0x104
+ attributes:
+ - searched
+ - found
+ - insert
+ - insert-failed
+ - drop
+ - early-drop
+ - error
+ - search-restart
+ - clash-resolve
+ - chain-toolong
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index cbb544bd6c84..766b82005d18 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -115,6 +115,9 @@ attribute-sets:
type: u64
enum: xsk-flags
-
+ name: io-uring-provider-info
+ attributes: []
+ -
name: page-pool
attributes:
-
@@ -171,6 +174,11 @@ attribute-sets:
name: dmabuf
doc: ID of the dmabuf this page-pool is attached to.
type: u32
+ -
+ name: io-uring
+ doc: io-uring memory provider information.
+ type: nest
+ nested-attributes: io-uring-provider-info
-
name: page-pool-info
subset-of: page-pool
@@ -269,6 +277,9 @@ attribute-sets:
processing, if event polling finds events
type: uint
-
+ name: xsk-info
+ attributes: []
+ -
name: queue
attributes:
-
@@ -286,6 +297,9 @@ attribute-sets:
-
name: type
doc: Queue type as rx, tx. Each queue type defines a separate ID space.
+ XDP TX queues allocated in the kernel are not linked to NAPIs and
+ thus not listed. AF_XDP queues will have more information set in
+ the xsk attribute.
type: u32
enum: queue-type
-
@@ -296,7 +310,16 @@ attribute-sets:
name: dmabuf
doc: ID of the dmabuf attached to this queue, if any.
type: u32
-
+ -
+ name: io-uring
+ doc: io_uring memory provider information.
+ type: nest
+ nested-attributes: io-uring-provider-info
+ -
+ name: xsk
+ doc: XSK information for this queue, if any.
+ type: nest
+ nested-attributes: xsk-info
-
name: qstats
doc: |
@@ -444,6 +467,8 @@ attribute-sets:
name: tx-needs-csum
doc: |
Number of packets that required the device to calculate the checksum.
+ This counter includes the number of GSO wire packets for which device
+ calculated the L4 checksum.
type: uint
-
name: tx-hw-gso-packets
@@ -572,6 +597,7 @@ operations:
- inflight-mem
- detach-time
- dmabuf
+ - io-uring
dump:
reply: *pp-reply
config-cond: page-pool
@@ -637,6 +663,8 @@ operations:
- napi-id
- ifindex
- dmabuf
+ - io-uring
+ - xsk
dump:
request:
attributes:
diff --git a/Documentation/netlink/specs/nl80211.yaml b/Documentation/netlink/specs/nl80211.yaml
new file mode 100644
index 000000000000..1ec49c3562cd
--- /dev/null
+++ b/Documentation/netlink/specs/nl80211.yaml
@@ -0,0 +1,2000 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: nl80211
+protocol: genetlink-legacy
+
+doc:
+ Netlink API for 802.11 wireless devices
+
+definitions:
+ -
+ name: commands
+ type: enum
+ entries:
+ - unspec
+ - get-wiphy
+ - set-wiphy
+ - new-wiphy
+ - del-wiphy
+ - get-interface
+ - set-interface
+ - new-interface
+ - del-interface
+ - get-key
+ - set-key
+ - new-key
+ - del-key
+ - get-beacon
+ - set-beacon
+ - new-beacon
+ - del-beacon
+ - get-station
+ - set-station
+ - new-station
+ - del-station
+ - get-mpath
+ - set-mpath
+ - new-mpath
+ - del-mpath
+ - set-bss
+ - set-reg
+ - req-set-reg
+ - get-mesh-config
+ - set-mesh-config
+ - set-mgmt-extra-ie
+ - get-reg
+ - get-scan
+ - trigger-scan
+ - new-scan-results
+ - scan-aborted
+ - reg-change
+ - authenticate
+ - associate
+ - deauthenticate
+ - disassociate
+ - michael-mic-failure
+ - reg-beacon-hint
+ - join-ibss
+ - leave-ibss
+ - testmode
+ - connect
+ - roam
+ - disconnect
+ - set-wiphy-netns
+ - get-survey
+ - new-survey-results
+ - set-pmksa
+ - del-pmksa
+ - flush-pmksa
+ - remain-on-channel
+ - cancel-remain-on-channel
+ - set-tx-bitrate-mask
+ - register-action
+ - action
+ - action-tx-status
+ - set-power-save
+ - get-power-save
+ - set-cqm
+ - notify-cqm
+ - set-channel
+ - set-wds-peer
+ - frame-wait-cancel
+ - join-mesh
+ - leave-mesh
+ - unprot-deauthenticate
+ - unprot-disassociate
+ - new-peer-candidate
+ - get-wowlan
+ - set-wowlan
+ - start-sched-scan
+ - stop-sched-scan
+ - sched-scan-results
+ - sched-scan-stopped
+ - set-rekey-offload
+ - pmksa-candidate
+ - tdls-oper
+ - tdls-mgmt
+ - unexpected-frame
+ - probe-client
+ - register-beacons
+ - unexpected-4-addr-frame
+ - set-noack-map
+ - ch-switch-notify
+ - start-p2p-device
+ - stop-p2p-device
+ - conn-failed
+ - set-mcast-rate
+ - set-mac-acl
+ - radar-detect
+ - get-protocol-features
+ - update-ft-ies
+ - ft-event
+ - crit-protocol-start
+ - crit-protocol-stop
+ - get-coalesce
+ - set-coalesce
+ - channel-switch
+ - vendor
+ - set-qos-map
+ - add-tx-ts
+ - del-tx-ts
+ - get-mpp
+ - join-ocb
+ - leave-ocb
+ - ch-switch-started-notify
+ - tdls-channel-switch
+ - tdls-cancel-channel-switch
+ - wiphy-reg-change
+ - abort-scan
+ - start-nan
+ - stop-nan
+ - add-nan-function
+ - del-nan-function
+ - change-nan-config
+ - nan-match
+ - set-multicast-to-unicast
+ - update-connect-params
+ - set-pmk
+ - del-pmk
+ - port-authorized
+ - reload-regdb
+ - external-auth
+ - sta-opmode-changed
+ - control-port-frame
+ - get-ftm-responder-stats
+ - peer-measurement-start
+ - peer-measurement-result
+ - peer-measurement-complete
+ - notify-radar
+ - update-owe-info
+ - probe-mesh-link
+ - set-tid-config
+ - unprot-beacon
+ - control-port-frame-tx-status
+ - set-sar-specs
+ - obss-color-collision
+ - color-change-request
+ - color-change-started
+ - color-change-aborted
+ - color-change-completed
+ - set-fils-aad
+ - assoc-comeback
+ - add-link
+ - remove-link
+ - add-link-sta
+ - modify-link-sta
+ - remove-link-sta
+ - set-hw-timestamp
+ - links-removed
+ - set-tid-to-link-mapping
+ -
+ name: feature-flags
+ type: flags
+ entries:
+ - sk-tx-status
+ - ht-ibss
+ - inactivity-timer
+ - cell-base-reg-hints
+ - p2p-device-needs-channel
+ - sae
+ - low-priority-scan
+ - scan-flush
+ - ap-scan
+ - vif-txpower
+ - need-obss-scan
+ - p2p-go-ctwin
+ - p2p-go-oppps
+ - reserved
+ - advertise-chan-limits
+ - full-ap-client-state
+ - userspace-mpm
+ - active-monitor
+ - ap-mode-chan-width-change
+ - ds-param-set-ie-in-probes
+ - wfa-tpc-ie-in-probes
+ - quiet
+ - tx-power-insertion
+ - ackto-estimation
+ - static-smps
+ - dynamic-smps
+ - supports-wmm-admission
+ - mac-on-create
+ - tdls-channel-switch
+ - scan-random-mac-addr
+ - sched-scan-random-mac-addr
+ - no-random-mac-addr
+ -
+ name: ieee80211-mcs-info
+ type: struct
+ members:
+ -
+ name: rx-mask
+ type: binary
+ len: 10
+ -
+ name: rx-highest
+ type: u16
+ byte-order: little-endian
+ -
+ name: tx-params
+ type: u8
+ -
+ name: reserved
+ type: binary
+ len: 3
+ -
+ name: ieee80211-vht-mcs-info
+ type: struct
+ members:
+ -
+ name: rx-mcs-map
+ type: u16
+ byte-order: little-endian
+ -
+ name: rx-highest
+ type: u16
+ byte-order: little-endian
+ -
+ name: tx-mcs-map
+ type: u16
+ byte-order: little-endian
+ -
+ name: tx-highest
+ type: u16
+ byte-order: little-endian
+ -
+ name: ieee80211-ht-cap
+ type: struct
+ members:
+ -
+ name: cap-info
+ type: u16
+ byte-order: little-endian
+ -
+ name: ampdu-params-info
+ type: u8
+ -
+ name: mcs
+ type: binary
+ struct: ieee80211-mcs-info
+ -
+ name: extended-ht-cap-info
+ type: u16
+ byte-order: little-endian
+ -
+ name: tx-bf-cap-info
+ type: u32
+ byte-order: little-endian
+ -
+ name: antenna-selection-info
+ type: u8
+ -
+ name: channel-type
+ type: enum
+ entries:
+ - no-ht
+ - ht20
+ - ht40minus
+ - ht40plus
+ -
+ name: sta-flag-update
+ type: struct
+ members:
+ -
+ name: mask
+ type: u32
+ -
+ name: set
+ type: u32
+ -
+ name: protocol-features
+ type: flags
+ entries:
+ - split-wiphy-dump
+
+attribute-sets:
+ -
+ name: nl80211-attrs
+ name-prefix: nl80211-attr-
+ enum-name: nl80211-attrs
+ attr-max-name: num-nl80211-attr
+ attributes:
+ -
+ name: wiphy
+ type: u32
+ -
+ name: wiphy-name
+ type: string
+ -
+ name: ifindex
+ type: u32
+ -
+ name: ifname
+ type: string
+ -
+ name: iftype
+ type: u32
+ -
+ name: mac
+ type: binary
+ display-hint: mac
+ -
+ name: key-data
+ type: binary
+ -
+ name: key-idx
+ type: u8
+ -
+ name: key-cipher
+ type: u32
+ -
+ name: key-seq
+ type: binary
+ -
+ name: key-default
+ type: flag
+ -
+ name: beacon-interval
+ type: u32
+ -
+ name: dtim-period
+ type: u32
+ -
+ name: beacon-head
+ type: binary
+ -
+ name: beacon-tail
+ type: binary
+ -
+ name: sta-aid
+ type: u16
+ -
+ name: sta-flags
+ type: binary # TODO: nest
+ -
+ name: sta-listen-interval
+ type: u16
+ -
+ name: sta-supported-rates
+ type: binary
+ -
+ name: sta-vlan
+ type: u32
+ -
+ name: sta-info
+ type: binary # TODO: nest
+ -
+ name: wiphy-bands
+ type: nest
+ nested-attributes: wiphy-bands
+ -
+ name: mntr-flags
+ type: binary # TODO: nest
+ -
+ name: mesh-id
+ type: binary
+ -
+ name: sta-plink-action
+ type: u8
+ -
+ name: mpath-next-hop
+ type: binary
+ display-hint: mac
+ -
+ name: mpath-info
+ type: binary # TODO: nest
+ -
+ name: bss-cts-prot
+ type: u8
+ -
+ name: bss-short-preamble
+ type: u8
+ -
+ name: bss-short-slot-time
+ type: u8
+ -
+ name: ht-capability
+ type: binary
+ -
+ name: supported-iftypes
+ type: nest
+ nested-attributes: supported-iftypes
+ -
+ name: reg-alpha2
+ type: binary
+ -
+ name: reg-rules
+ type: binary # TODO: nest
+ -
+ name: mesh-config
+ type: binary # TODO: nest
+ -
+ name: bss-basic-rates
+ type: binary
+ -
+ name: wiphy-txq-params
+ type: binary # TODO: nest
+ -
+ name: wiphy-freq
+ type: u32
+ -
+ name: wiphy-channel-type
+ type: u32
+ enum: channel-type
+ -
+ name: key-default-mgmt
+ type: flag
+ -
+ name: mgmt-subtype
+ type: u8
+ -
+ name: ie
+ type: binary
+ -
+ name: max-num-scan-ssids
+ type: u8
+ -
+ name: scan-frequencies
+ type: binary # TODO: nest
+ -
+ name: scan-ssids
+ type: binary # TODO: nest
+ -
+ name: generation
+ type: u32
+ -
+ name: bss
+ type: binary # TODO: nest
+ -
+ name: reg-initiator
+ type: u8
+ -
+ name: reg-type
+ type: u8
+ -
+ name: supported-commands
+ type: indexed-array
+ sub-type: u32
+ enum: commands
+ -
+ name: frame
+ type: binary
+ -
+ name: ssid
+ type: binary
+ -
+ name: auth-type
+ type: u32
+ -
+ name: reason-code
+ type: u16
+ -
+ name: key-type
+ type: u32
+ -
+ name: max-scan-ie-len
+ type: u16
+ -
+ name: cipher-suites
+ type: binary
+ sub-type: u32
+ display-hint: hex
+ -
+ name: freq-before
+ type: binary # TODO: nest
+ -
+ name: freq-after
+ type: binary # TODO: nest
+ -
+ name: freq-fixed
+ type: flag
+ -
+ name: wiphy-retry-short
+ type: u8
+ -
+ name: wiphy-retry-long
+ type: u8
+ -
+ name: wiphy-frag-threshold
+ type: u32
+ -
+ name: wiphy-rts-threshold
+ type: u32
+ -
+ name: timed-out
+ type: flag
+ -
+ name: use-mfp
+ type: u32
+ -
+ name: sta-flags2
+ type: binary
+ struct: sta-flag-update
+ -
+ name: control-port
+ type: flag
+ -
+ name: testdata
+ type: binary
+ -
+ name: privacy
+ type: flag
+ -
+ name: disconnected-by-ap
+ type: flag
+ -
+ name: status-code
+ type: u16
+ -
+ name: cipher-suites-pairwise
+ type: binary
+ -
+ name: cipher-suite-group
+ type: u32
+ -
+ name: wpa-versions
+ type: u32
+ -
+ name: akm-suites
+ type: binary
+ -
+ name: req-ie
+ type: binary
+ -
+ name: resp-ie
+ type: binary
+ -
+ name: prev-bssid
+ type: binary
+ -
+ name: key
+ type: binary # TODO: nest
+ -
+ name: keys
+ type: binary # TODO: nest
+ -
+ name: pid
+ type: u32
+ -
+ name: 4addr
+ type: u8
+ -
+ name: survey-info
+ type: binary # TODO: nest
+ -
+ name: pmkid
+ type: binary
+ -
+ name: max-num-pmkids
+ type: u8
+ -
+ name: duration
+ type: u32
+ -
+ name: cookie
+ type: u64
+ -
+ name: wiphy-coverage-class
+ type: u8
+ -
+ name: tx-rates
+ type: binary # TODO: nest
+ -
+ name: frame-match
+ type: binary
+ -
+ name: ack
+ type: flag
+ -
+ name: ps-state
+ type: u32
+ -
+ name: cqm
+ type: binary # TODO: nest
+ -
+ name: local-state-change
+ type: flag
+ -
+ name: ap-isolate
+ type: u8
+ -
+ name: wiphy-tx-power-setting
+ type: u32
+ -
+ name: wiphy-tx-power-level
+ type: u32
+ -
+ name: tx-frame-types
+ type: nest
+ nested-attributes: iftype-attrs
+ -
+ name: rx-frame-types
+ type: nest
+ nested-attributes: iftype-attrs
+ -
+ name: frame-type
+ type: u16
+ -
+ name: control-port-ethertype
+ type: flag
+ -
+ name: control-port-no-encrypt
+ type: flag
+ -
+ name: support-ibss-rsn
+ type: flag
+ -
+ name: wiphy-antenna-tx
+ type: u32
+ -
+ name: wiphy-antenna-rx
+ type: u32
+ -
+ name: mcast-rate
+ type: u32
+ -
+ name: offchannel-tx-ok
+ type: flag
+ -
+ name: bss-ht-opmode
+ type: u16
+ -
+ name: key-default-types
+ type: binary # TODO: nest
+ -
+ name: max-remain-on-channel-duration
+ type: u32
+ -
+ name: mesh-setup
+ type: binary # TODO: nest
+ -
+ name: wiphy-antenna-avail-tx
+ type: u32
+ -
+ name: wiphy-antenna-avail-rx
+ type: u32
+ -
+ name: support-mesh-auth
+ type: flag
+ -
+ name: sta-plink-state
+ type: u8
+ -
+ name: wowlan-triggers
+ type: binary # TODO: nest
+ -
+ name: wowlan-triggers-supported
+ type: nest
+ nested-attributes: wowlan-triggers-attrs
+ -
+ name: sched-scan-interval
+ type: u32
+ -
+ name: interface-combinations
+ type: indexed-array
+ sub-type: nest
+ nested-attributes: if-combination-attributes
+ -
+ name: software-iftypes
+ type: nest
+ nested-attributes: supported-iftypes
+ -
+ name: rekey-data
+ type: binary # TODO: nest
+ -
+ name: max-num-sched-scan-ssids
+ type: u8
+ -
+ name: max-sched-scan-ie-len
+ type: u16
+ -
+ name: scan-supp-rates
+ type: binary # TODO: nest
+ -
+ name: hidden-ssid
+ type: u32
+ -
+ name: ie-probe-resp
+ type: binary
+ -
+ name: ie-assoc-resp
+ type: binary
+ -
+ name: sta-wme
+ type: binary # TODO: nest
+ -
+ name: support-ap-uapsd
+ type: flag
+ -
+ name: roam-support
+ type: flag
+ -
+ name: sched-scan-match
+ type: binary # TODO: nest
+ -
+ name: max-match-sets
+ type: u8
+ -
+ name: pmksa-candidate
+ type: binary # TODO: nest
+ -
+ name: tx-no-cck-rate
+ type: flag
+ -
+ name: tdls-action
+ type: u8
+ -
+ name: tdls-dialog-token
+ type: u8
+ -
+ name: tdls-operation
+ type: u8
+ -
+ name: tdls-support
+ type: flag
+ -
+ name: tdls-external-setup
+ type: flag
+ -
+ name: device-ap-sme
+ type: u32
+ -
+ name: dont-wait-for-ack
+ type: flag
+ -
+ name: feature-flags
+ type: u32
+ enum: feature-flags
+ enum-as-flags: True
+ -
+ name: probe-resp-offload
+ type: u32
+ -
+ name: probe-resp
+ type: binary
+ -
+ name: dfs-region
+ type: u8
+ -
+ name: disable-ht
+ type: flag
+ -
+ name: ht-capability-mask
+ type: binary
+ struct: ieee80211-ht-cap
+ -
+ name: noack-map
+ type: u16
+ -
+ name: inactivity-timeout
+ type: u16
+ -
+ name: rx-signal-dbm
+ type: u32
+ -
+ name: bg-scan-period
+ type: u16
+ -
+ name: wdev
+ type: u64
+ -
+ name: user-reg-hint-type
+ type: u32
+ -
+ name: conn-failed-reason
+ type: u32
+ -
+ name: auth-data
+ type: binary
+ -
+ name: vht-capability
+ type: binary
+ -
+ name: scan-flags
+ type: u32
+ -
+ name: channel-width
+ type: u32
+ -
+ name: center-freq1
+ type: u32
+ -
+ name: center-freq2
+ type: u32
+ -
+ name: p2p-ctwindow
+ type: u8
+ -
+ name: p2p-oppps
+ type: u8
+ -
+ name: local-mesh-power-mode
+ type: u32
+ -
+ name: acl-policy
+ type: u32
+ -
+ name: mac-addrs
+ type: binary # TODO: nest
+ -
+ name: mac-acl-max
+ type: u32
+ -
+ name: radar-event
+ type: u32
+ -
+ name: ext-capa
+ type: binary
+ -
+ name: ext-capa-mask
+ type: binary
+ -
+ name: sta-capability
+ type: u16
+ -
+ name: sta-ext-capability
+ type: binary
+ -
+ name: protocol-features
+ type: u32
+ enum: protocol-features
+ -
+ name: split-wiphy-dump
+ type: flag
+ -
+ name: disable-vht
+ type: flag
+ -
+ name: vht-capability-mask
+ type: binary
+ -
+ name: mdid
+ type: u16
+ -
+ name: ie-ric
+ type: binary
+ -
+ name: crit-prot-id
+ type: u16
+ -
+ name: max-crit-prot-duration
+ type: u16
+ -
+ name: peer-aid
+ type: u16
+ -
+ name: coalesce-rule
+ type: binary # TODO: nest
+ -
+ name: ch-switch-count
+ type: u32
+ -
+ name: ch-switch-block-tx
+ type: flag
+ -
+ name: csa-ies
+ type: binary # TODO: nest
+ -
+ name: cntdwn-offs-beacon
+ type: binary
+ -
+ name: cntdwn-offs-presp
+ type: binary
+ -
+ name: rxmgmt-flags
+ type: binary
+ -
+ name: sta-supported-channels
+ type: binary
+ -
+ name: sta-supported-oper-classes
+ type: binary
+ -
+ name: handle-dfs
+ type: flag
+ -
+ name: support-5-mhz
+ type: flag
+ -
+ name: support-10-mhz
+ type: flag
+ -
+ name: opmode-notif
+ type: u8
+ -
+ name: vendor-id
+ type: u32
+ -
+ name: vendor-subcmd
+ type: u32
+ -
+ name: vendor-data
+ type: binary
+ -
+ name: vendor-events
+ type: binary
+ -
+ name: qos-map
+ type: binary
+ -
+ name: mac-hint
+ type: binary
+ display-hint: mac
+ -
+ name: wiphy-freq-hint
+ type: u32
+ -
+ name: max-ap-assoc-sta
+ type: u32
+ -
+ name: tdls-peer-capability
+ type: u32
+ -
+ name: socket-owner
+ type: flag
+ -
+ name: csa-c-offsets-tx
+ type: binary
+ -
+ name: max-csa-counters
+ type: u8
+ -
+ name: tdls-initiator
+ type: flag
+ -
+ name: use-rrm
+ type: flag
+ -
+ name: wiphy-dyn-ack
+ type: flag
+ -
+ name: tsid
+ type: u8
+ -
+ name: user-prio
+ type: u8
+ -
+ name: admitted-time
+ type: u16
+ -
+ name: smps-mode
+ type: u8
+ -
+ name: oper-class
+ type: u8
+ -
+ name: mac-mask
+ type: binary
+ display-hint: mac
+ -
+ name: wiphy-self-managed-reg
+ type: flag
+ -
+ name: ext-features
+ type: binary
+ -
+ name: survey-radio-stats
+ type: binary
+ -
+ name: netns-fd
+ type: u32
+ -
+ name: sched-scan-delay
+ type: u32
+ -
+ name: reg-indoor
+ type: flag
+ -
+ name: max-num-sched-scan-plans
+ type: u32
+ -
+ name: max-scan-plan-interval
+ type: u32
+ -
+ name: max-scan-plan-iterations
+ type: u32
+ -
+ name: sched-scan-plans
+ type: binary # TODO: nest
+ -
+ name: pbss
+ type: flag
+ -
+ name: bss-select
+ type: binary # TODO: nest
+ -
+ name: sta-support-p2p-ps
+ type: u8
+ -
+ name: pad
+ type: binary
+ -
+ name: iftype-ext-capa
+ type: binary # TODO: nest
+ -
+ name: mu-mimo-group-data
+ type: binary
+ -
+ name: mu-mimo-follow-mac-addr
+ type: binary
+ display-hint: mac
+ -
+ name: scan-start-time-tsf
+ type: u64
+ -
+ name: scan-start-time-tsf-bssid
+ type: binary
+ -
+ name: measurement-duration
+ type: u16
+ -
+ name: measurement-duration-mandatory
+ type: flag
+ -
+ name: mesh-peer-aid
+ type: u16
+ -
+ name: nan-master-pref
+ type: u8
+ -
+ name: bands
+ type: u32
+ -
+ name: nan-func
+ type: binary # TODO: nest
+ -
+ name: nan-match
+ type: binary # TODO: nest
+ -
+ name: fils-kek
+ type: binary
+ -
+ name: fils-nonces
+ type: binary
+ -
+ name: multicast-to-unicast-enabled
+ type: flag
+ -
+ name: bssid
+ type: binary
+ display-hint: mac
+ -
+ name: sched-scan-relative-rssi
+ type: s8
+ -
+ name: sched-scan-rssi-adjust
+ type: binary
+ -
+ name: timeout-reason
+ type: u32
+ -
+ name: fils-erp-username
+ type: binary
+ -
+ name: fils-erp-realm
+ type: binary
+ -
+ name: fils-erp-next-seq-num
+ type: u16
+ -
+ name: fils-erp-rrk
+ type: binary
+ -
+ name: fils-cache-id
+ type: binary
+ -
+ name: pmk
+ type: binary
+ -
+ name: sched-scan-multi
+ type: flag
+ -
+ name: sched-scan-max-reqs
+ type: u32
+ -
+ name: want-1x-4way-hs
+ type: flag
+ -
+ name: pmkr0-name
+ type: binary
+ -
+ name: port-authorized
+ type: binary
+ -
+ name: external-auth-action
+ type: u32
+ -
+ name: external-auth-support
+ type: flag
+ -
+ name: nss
+ type: u8
+ -
+ name: ack-signal
+ type: s32
+ -
+ name: control-port-over-nl80211
+ type: flag
+ -
+ name: txq-stats
+ type: nest
+ nested-attributes: txq-stats-attrs
+ -
+ name: txq-limit
+ type: u32
+ -
+ name: txq-memory-limit
+ type: u32
+ -
+ name: txq-quantum
+ type: u32
+ -
+ name: he-capability
+ type: binary
+ -
+ name: ftm-responder
+ type: binary # TODO: nest
+ -
+ name: ftm-responder-stats
+ type: binary # TODO: nest
+ -
+ name: timeout
+ type: u32
+ -
+ name: peer-measurements
+ type: binary # TODO: nest
+ -
+ name: airtime-weight
+ type: u16
+ -
+ name: sta-tx-power-setting
+ type: u8
+ -
+ name: sta-tx-power
+ type: s16
+ -
+ name: sae-password
+ type: binary
+ -
+ name: twt-responder
+ type: flag
+ -
+ name: he-obss-pd
+ type: binary # TODO: nest
+ -
+ name: wiphy-edmg-channels
+ type: u8
+ -
+ name: wiphy-edmg-bw-config
+ type: u8
+ -
+ name: vlan-id
+ type: u16
+ -
+ name: he-bss-color
+ type: binary # TODO: nest
+ -
+ name: iftype-akm-suites
+ type: binary # TODO: nest
+ -
+ name: tid-config
+ type: binary # TODO: nest
+ -
+ name: control-port-no-preauth
+ type: flag
+ -
+ name: pmk-lifetime
+ type: u32
+ -
+ name: pmk-reauth-threshold
+ type: u8
+ -
+ name: receive-multicast
+ type: flag
+ -
+ name: wiphy-freq-offset
+ type: u32
+ -
+ name: center-freq1-offset
+ type: u32
+ -
+ name: scan-freq-khz
+ type: binary # TODO: nest
+ -
+ name: he-6ghz-capability
+ type: binary
+ -
+ name: fils-discovery
+ type: binary # TOOD: nest
+ -
+ name: unsol-bcast-probe-resp
+ type: binary # TOOD: nest
+ -
+ name: s1g-capability
+ type: binary
+ -
+ name: s1g-capability-mask
+ type: binary
+ -
+ name: sae-pwe
+ type: u8
+ -
+ name: reconnect-requested
+ type: binary
+ -
+ name: sar-spec
+ type: nest
+ nested-attributes: sar-attributes
+ -
+ name: disable-he
+ type: flag
+ -
+ name: obss-color-bitmap
+ type: u64
+ -
+ name: color-change-count
+ type: u8
+ -
+ name: color-change-color
+ type: u8
+ -
+ name: color-change-elems
+ type: binary # TODO: nest
+ -
+ name: mbssid-config
+ type: binary # TODO: nest
+ -
+ name: mbssid-elems
+ type: binary # TODO: nest
+ -
+ name: radar-background
+ type: flag
+ -
+ name: ap-settings-flags
+ type: u32
+ -
+ name: eht-capability
+ type: binary
+ -
+ name: disable-eht
+ type: flag
+ -
+ name: mlo-links
+ type: binary # TODO: nest
+ -
+ name: mlo-link-id
+ type: u8
+ -
+ name: mld-addr
+ type: binary
+ display-hint: mac
+ -
+ name: mlo-support
+ type: flag
+ -
+ name: max-num-akm-suites
+ type: binary
+ -
+ name: eml-capability
+ type: u16
+ -
+ name: mld-capa-and-ops
+ type: u16
+ -
+ name: tx-hw-timestamp
+ type: u64
+ -
+ name: rx-hw-timestamp
+ type: u64
+ -
+ name: td-bitmap
+ type: binary
+ -
+ name: punct-bitmap
+ type: u32
+ -
+ name: max-hw-timestamp-peers
+ type: u16
+ -
+ name: hw-timestamp-enabled
+ type: flag
+ -
+ name: ema-rnr-elems
+ type: binary # TODO: nest
+ -
+ name: mlo-link-disabled
+ type: flag
+ -
+ name: bss-dump-include-use-data
+ type: flag
+ -
+ name: mlo-ttlm-dlink
+ type: u16
+ -
+ name: mlo-ttlm-ulink
+ type: u16
+ -
+ name: assoc-spp-amsdu
+ type: flag
+ -
+ name: wiphy-radios
+ type: binary # TODO: nest
+ -
+ name: wiphy-interface-combinations
+ type: binary # TODO: nest
+ -
+ name: vif-radio-mask
+ type: u32
+ -
+ name: frame-type-attrs
+ subset-of: nl80211-attrs
+ attributes:
+ -
+ name: frame-type
+ -
+ name: wiphy-bands
+ name-prefix: nl80211-band-
+ attr-max-name: num-nl80211-bands
+ attributes:
+ -
+ name: 2ghz
+ doc: 2.4 GHz ISM band
+ value: 0
+ type: nest
+ nested-attributes: band-attrs
+ -
+ name: 5ghz
+ doc: around 5 GHz band (4.9 - 5.7 GHz)
+ type: nest
+ nested-attributes: band-attrs
+ -
+ name: 60ghz
+ doc: around 60 GHz band (58.32 - 69.12 GHz)
+ type: nest
+ nested-attributes: band-attrs
+ -
+ name: 6ghz
+ type: nest
+ nested-attributes: band-attrs
+ -
+ name: s1ghz
+ type: nest
+ nested-attributes: band-attrs
+ -
+ name: lc
+ type: nest
+ nested-attributes: band-attrs
+ -
+ name: band-attrs
+ enum-name: nl80211-band-attr
+ name-prefix: nl80211-band-attr-
+ attributes:
+ -
+ name: freqs
+ type: indexed-array
+ sub-type: nest
+ nested-attributes: frequency-attrs
+ -
+ name: rates
+ type: indexed-array
+ sub-type: nest
+ nested-attributes: bitrate-attrs
+ -
+ name: ht-mcs-set
+ type: binary
+ struct: ieee80211-mcs-info
+ -
+ name: ht-capa
+ type: u16
+ -
+ name: ht-ampdu-factor
+ type: u8
+ -
+ name: ht-ampdu-density
+ type: u8
+ -
+ name: vht-mcs-set
+ type: binary
+ struct: ieee80211-vht-mcs-info
+ -
+ name: vht-capa
+ type: u32
+ -
+ name: iftype-data
+ type: indexed-array
+ sub-type: nest
+ nested-attributes: iftype-data-attrs
+ -
+ name: edmg-channels
+ type: binary
+ -
+ name: edmg-bw-config
+ type: binary
+ -
+ name: s1g-mcs-nss-set
+ type: binary
+ -
+ name: s1g-capa
+ type: binary
+ -
+ name: bitrate-attrs
+ name-prefix: nl80211-bitrate-attr-
+ attributes:
+ -
+ name: rate
+ type: u32
+ -
+ name: 2ghz-shortpreamble
+ type: flag
+ -
+ name: frequency-attrs
+ name-prefix: nl80211-frequency-attr-
+ attributes:
+ -
+ name: freq
+ type: u32
+ -
+ name: disabled
+ type: flag
+ -
+ name: no-ir
+ type: flag
+ -
+ name: no-ibss
+ name-prefix: __nl80211-frequency-attr-
+ type: flag
+ -
+ name: radar
+ type: flag
+ -
+ name: max-tx-power
+ type: u32
+ -
+ name: dfs-state
+ type: u32
+ -
+ name: dfs-time
+ type: binary
+ -
+ name: no-ht40-minus
+ type: binary
+ -
+ name: no-ht40-plus
+ type: binary
+ -
+ name: no-80mhz
+ type: binary
+ -
+ name: no-160mhz
+ type: binary
+ -
+ name: dfs-cac-time
+ type: binary
+ -
+ name: indoor-only
+ type: binary
+ -
+ name: ir-concurrent
+ type: binary
+ -
+ name: no-20mhz
+ type: binary
+ -
+ name: no-10mhz
+ type: binary
+ -
+ name: wmm
+ type: indexed-array
+ sub-type: nest
+ nested-attributes: wmm-attrs
+ -
+ name: no-he
+ type: binary
+ -
+ name: offset
+ type: u32
+ -
+ name: 1mhz
+ type: binary
+ -
+ name: 2mhz
+ type: binary
+ -
+ name: 4mhz
+ type: binary
+ -
+ name: 8mhz
+ type: binary
+ -
+ name: 16mhz
+ type: binary
+ -
+ name: no-320mhz
+ type: binary
+ -
+ name: no-eht
+ type: binary
+ -
+ name: psd
+ type: binary
+ -
+ name: dfs-concurrent
+ type: binary
+ -
+ name: no-6ghz-vlp-client
+ type: binary
+ -
+ name: no-6ghz-afc-client
+ type: binary
+ -
+ name: can-monitor
+ type: binary
+ -
+ name: allow-6ghz-vlp-ap
+ type: binary
+ -
+ name: if-combination-attributes
+ enum-name: nl80211-if-combination-attrs
+ name-prefix: nl80211-iface-comb-
+ attr-max-name: max-nl80211-iface-comb
+ attributes:
+ -
+ name: limits
+ type: indexed-array
+ sub-type: nest
+ nested-attributes: iface-limit-attributes
+ -
+ name: maxnum
+ type: u32
+ -
+ name: sta-ap-bi-match
+ type: flag
+ -
+ name: num-channels
+ type: u32
+ -
+ name: radar-detect-widths
+ type: u32
+ -
+ name: radar-detect-regions
+ type: u32
+ -
+ name: bi-min-gcd
+ type: u32
+ -
+ name: iface-limit-attributes
+ enum-name: nl80211-iface-limit-attrs
+ name-prefix: nl80211-iface-limit-
+ attr-max-name: max-nl80211-iface-limit
+ attributes:
+ -
+ name: max
+ type: u32
+ -
+ name: types
+ type: nest
+ nested-attributes: supported-iftypes
+ -
+ name: iftype-data-attrs
+ name-prefix: nl80211-band-iftype-attr-
+ attributes:
+ -
+ name: iftypes
+ type: binary
+ -
+ name: he-cap-mac
+ type: binary
+ -
+ name: he-cap-phy
+ type: binary
+ -
+ name: he-cap-mcs-set
+ type: binary
+ -
+ name: he-cap-ppe
+ type: binary
+ -
+ name: he-6ghz-capa
+ type: binary
+ -
+ name: vendor-elems
+ type: binary
+ -
+ name: eht-cap-mac
+ type: binary
+ -
+ name: eht-cap-phy
+ type: binary
+ -
+ name: eht-cap-mcs-set
+ type: binary
+ -
+ name: eht-cap-ppe
+ type: binary
+ -
+ name: iftype-attrs
+ enum-name: nl80211-iftype
+ name-prefix: nl80211-iftype-
+ attributes:
+ -
+ name: unspecified
+ type: nest
+ value: 0
+ nested-attributes: frame-type-attrs
+ -
+ name: adhoc
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: station
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: ap
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: ap-vlan
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: wds
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: monitor
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: mesh-point
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: p2p-client
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: p2p-go
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: p2p-device
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: ocb
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: nan
+ type: nest
+ nested-attributes: frame-type-attrs
+ -
+ name: sar-attributes
+ enum-name: nl80211-sar-attrs
+ name-prefix: nl80211-sar-attr-
+ attributes:
+ -
+ name: type
+ type: u32
+ -
+ name: specs
+ type: indexed-array
+ sub-type: nest
+ nested-attributes: sar-specs
+ -
+ name: sar-specs
+ enum-name: nl80211-sar-specs-attrs
+ name-prefix: nl80211-sar-attr-specs-
+ attributes:
+ -
+ name: power
+ type: s32
+ -
+ name: range-index
+ type: u32
+ -
+ name: start-freq
+ type: u32
+ -
+ name: end-freq
+ type: u32
+ -
+ name: supported-iftypes
+ enum-name: nl80211-iftype
+ name-prefix: nl80211-iftype-
+ attributes:
+ -
+ name: adhoc
+ type: flag
+ -
+ name: station
+ type: flag
+ -
+ name: ap
+ type: flag
+ -
+ name: ap-vlan
+ type: flag
+ -
+ name: wds
+ type: flag
+ -
+ name: monitor
+ type: flag
+ -
+ name: mesh-point
+ type: flag
+ -
+ name: p2p-client
+ type: flag
+ -
+ name: p2p-go
+ type: flag
+ -
+ name: p2p-device
+ type: flag
+ -
+ name: ocb
+ type: flag
+ -
+ name: nan
+ type: flag
+ -
+ name: txq-stats-attrs
+ name-prefix: nl80211-txq-stats-
+ attributes:
+ -
+ name: backlog-bytes
+ type: u32
+ -
+ name: backlog-packets
+ type: u32
+ -
+ name: flows
+ type: u32
+ -
+ name: drops
+ type: u32
+ -
+ name: ecn-marks
+ type: u32
+ -
+ name: overlimit
+ type: u32
+ -
+ name: overmemory
+ type: u32
+ -
+ name: collisions
+ type: u32
+ -
+ name: tx-bytes
+ type: u32
+ -
+ name: tx-packets
+ type: u32
+ -
+ name: max-flows
+ type: u32
+ -
+ name: wmm-attrs
+ enum-name: nl80211-wmm-rule
+ name-prefix: nl80211-wmmr-
+ attributes:
+ -
+ name: cw-min
+ type: u16
+ -
+ name: cw-max
+ type: u16
+ -
+ name: aifsn
+ type: u8
+ -
+ name: txop
+ type: u16
+ -
+ name: wowlan-triggers-attrs
+ enum-name: nl80211-wowlan-triggers
+ name-prefix: nl80211-wowlan-trig-
+ attr-max-name: max-nl80211-wowlan-trig
+ attributes:
+ -
+ name: any
+ type: flag
+ -
+ name: disconnect
+ type: flag
+ -
+ name: magic-pkt
+ type: flag
+ -
+ name: pkt-pattern
+ type: flag
+ -
+ name: gtk-rekey-supported
+ type: flag
+ -
+ name: gtk-rekey-failure
+ type: flag
+ -
+ name: eap-ident-request
+ type: flag
+ -
+ name: 4way-handshake
+ type: flag
+ -
+ name: rfkill-release
+ type: flag
+ -
+ name: wakeup-pkt-80211
+ type: flag
+ -
+ name: wakeup-pkt-80211-len
+ type: flag
+ -
+ name: wakeup-pkt-8023
+ type: flag
+ -
+ name: wakeup-pkt-8023-len
+ type: flag
+ -
+ name: tcp-connection
+ type: flag
+ -
+ name: wakeup-tcp-match
+ type: flag
+ -
+ name: wakeup-tcp-connlost
+ type: flag
+ -
+ name: wakeup-tcp-nomoretokens
+ type: flag
+ -
+ name: net-detect
+ type: flag
+ -
+ name: net-detect-results
+ type: flag
+ -
+ name: unprotected-deauth-disassoc
+ type: flag
+
+operations:
+ enum-model: directional
+ list:
+ -
+ name: get-wiphy
+ doc: |
+ Get information about a wiphy or dump a list of all wiphys. Requests to dump get-wiphy
+ should unconditionally include the split-wiphy-dump flag in the request.
+ attribute-set: nl80211-attrs
+ do:
+ request:
+ value: 1
+ attributes:
+ - wiphy
+ - wdev
+ - ifindex
+ reply:
+ value: 3
+ attributes: &wiphy-reply-attrs
+ - bands
+ - cipher-suites
+ - control-port-ethertype
+ - ext-capa
+ - ext-capa-mask
+ - ext-features
+ - feature-flags
+ - generation
+ - ht-capability-mask
+ - interface-combinations
+ - mac
+ - max-csa-counters
+ - max-match-sets
+ - max-num-akm-suites
+ - max-num-pmkids
+ - max-num-scan-ssids
+ - max-num-sched-scan-plans
+ - max-num-sched-scan-ssids
+ - max-remain-on-channel-duration
+ - max-scan-ie-len
+ - max-scan-plan-interval
+ - max-scan-plan-iterations
+ - max-sched-scan-ie-len
+ - offchannel-tx-ok
+ - rx-frame-types
+ - sar-spec
+ - sched-scan-max-reqs
+ - software-iftypes
+ - support-ap-uapsd
+ - supported-commands
+ - supported-iftypes
+ - tdls-external-setup
+ - tdls-support
+ - tx-frame-types
+ - txq-limit
+ - txq-memory-limit
+ - txq-quantum
+ - txq-stats
+ - vht-capability-mask
+ - wiphy
+ - wiphy-antenna-avail-rx
+ - wiphy-antenna-avail-tx
+ - wiphy-antenna-rx
+ - wiphy-antenna-tx
+ - wiphy-bands
+ - wiphy-coverage-class
+ - wiphy-frag-threshold
+ - wiphy-name
+ - wiphy-retry-long
+ - wiphy-retry-short
+ - wiphy-rts-threshold
+ - wowlan-triggers-supported
+ dump:
+ request:
+ attributes:
+ - wiphy
+ - wdev
+ - ifindex
+ - split-wiphy-dump
+ reply:
+ attributes: *wiphy-reply-attrs
+ -
+ name: get-interface
+ doc: Get information about an interface or dump a list of all interfaces
+ attribute-set: nl80211-attrs
+ do:
+ request:
+ value: 5
+ attributes:
+ - ifname
+ reply:
+ value: 7
+ attributes: &interface-reply-attrs
+ - ifname
+ - iftype
+ - ifindex
+ - wiphy
+ - wdev
+ - mac
+ - generation
+ - txq-stats
+ - 4addr
+ dump:
+ request:
+ attributes:
+ - ifname
+ reply:
+ attributes: *interface-reply-attrs
+ -
+ name: get-protocol-features
+ doc: Get information about supported protocol features
+ attribute-set: nl80211-attrs
+ do:
+ request:
+ value: 95
+ attributes:
+ - protocol-features
+ reply:
+ value: 95
+ attributes:
+ - protocol-features
+
+mcast-groups:
+ list:
+ -
+ name: config
+ -
+ name: scan
+ -
+ name: regulatory
+ -
+ name: mlme
+ -
+ name: vendor
+ -
+ name: nan
+ -
+ name: testmode
diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml
index cbee1cedb177..5dd5469044c7 100644
--- a/Documentation/netlink/specs/rt_addr.yaml
+++ b/Documentation/netlink/specs/rt_addr.yaml
@@ -168,6 +168,29 @@ operations:
reply:
value: 20
attributes: *ifaddr-all
+ -
+ name: getmaddrs
+ doc: Get / dump IPv4/IPv6 multicast addresses.
+ attribute-set: addr-attrs
+ fixed-header: ifaddrmsg
+ do:
+ request:
+ value: 58
+ attributes:
+ - ifa-family
+ - ifa-index
+ reply:
+ value: 58
+ attributes: &mcaddr-attrs
+ - ifa-multicast
+ - ifa-cacheinfo
+ dump:
+ request:
+ value: 58
+ - ifa-family
+ reply:
+ value: 58
+ attributes: *mcaddr-attrs
mcast-groups:
list:
diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt_rule.yaml
index a9debac3058a..b30c924087fa 100644
--- a/Documentation/netlink/specs/rt_rule.yaml
+++ b/Documentation/netlink/specs/rt_rule.yaml
@@ -182,6 +182,14 @@ attribute-sets:
type: u32
byte-order: big-endian
display-hint: hex
+ -
+ name: sport-mask
+ type: u16
+ display-hint: hex
+ -
+ name: dport-mask
+ type: u16
+ display-hint: hex
operations:
enum-model: directional
@@ -215,6 +223,8 @@ operations:
- dscp
- flowlabel
- flowlabel-mask
+ - sport-mask
+ - dport-mask
-
name: newrule-ntf
doc: Notify a rule creation
diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
index 8bf411b857d4..5f3885e56f58 100644
--- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
+++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
@@ -70,7 +70,7 @@ the DPSW object that it will probe:
Besides the configuration of the actual DPSW object, the dpaa2-switch driver
will need the following DPAA2 objects:
- * 1 DPMCP - A Management Command Portal object is needed for any interraction
+ * 1 DPMCP - A Management Command Portal object is needed for any interaction
with the MC firmware.
* 1 DPBP - A Buffer Pool is used for seeding buffers intended for the Rx path
diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst
index e3972d03cea0..792e9f8c846a 100644
--- a/Documentation/networking/devlink/ice.rst
+++ b/Documentation/networking/devlink/ice.rst
@@ -69,6 +69,17 @@ Parameters
To verify that value has been set:
$ devlink dev param show pci/0000:16:00.0 name tx_scheduling_layers
+ * - ``msix_vec_per_pf_max``
+ - driverinit
+ - Set the max MSI-X that can be used by the PF, rest can be utilized for
+ SRIOV. The range is from min value set in msix_vec_per_pf_min to
+ 2k/number of ports.
+ * - ``msix_vec_per_pf_min``
+ - driverinit
+ - Set the min MSI-X that will be used by the PF. This value inform how many
+ MSI-X will be allocated statically. The range is from 2 to value set
+ in msix_vec_per_pf_max.
+
.. list-table:: Driver specific parameters implemented
:widths: 5 5 90
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 41618538fc70..7febe0aecd53 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -280,6 +280,10 @@ Description of the vnic counters:
number of packets handled by the VNIC experiencing unexpected steering
failure (at any point in steering flow owned by the VNIC, including the FDB
for the eswitch owner).
+- icm_consumption
+ amount of Interconnect Host Memory (ICM) consumed by the vnic in
+ granularity of 4KB. ICM is host memory allocated by SW upon HCA request
+ and is used for storing data structures that control HCA operation.
User commands examples:
diff --git a/Documentation/networking/devlink/sfc.rst b/Documentation/networking/devlink/sfc.rst
index db64a1bd9733..0398d59ea184 100644
--- a/Documentation/networking/devlink/sfc.rst
+++ b/Documentation/networking/devlink/sfc.rst
@@ -5,7 +5,7 @@ sfc devlink support
===================
This document describes the devlink features implemented by the ``sfc``
-device driver for the ef100 device.
+device driver for the ef10 and ef100 devices.
Info versions
=============
@@ -18,6 +18,10 @@ The ``sfc`` driver reports the following versions
* - Name
- Type
- Description
+ * - ``fw.bundle_id``
+ - stored
+ - Version of the firmware "bundle" image that was last used to update
+ multiple components.
* - ``fw.mgmt.suc``
- running
- For boards where the management function is split between multiple
@@ -55,3 +59,13 @@ The ``sfc`` driver reports the following versions
* - ``fw.uefi``
- running
- UEFI driver version (No UNDI support).
+
+Flash Update
+============
+
+The ``sfc`` driver implements support for flash update using the
+``devlink-flash`` interface. It supports updating the device flash using a
+combined flash image ("bundle") that contains multiple components (on ef10,
+typically ``fw.mgmt``, ``fw.app``, ``fw.exprom`` and ``fw.uefi``).
+
+The driver does not support any overwrite mask flags.
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 363b4950d542..054561f8dcae 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -705,6 +705,8 @@ tcp_retries2 - INTEGER
seconds and is a lower bound for the effective timeout.
TCP will effectively time out at the first RTO which exceeds the
hypothetical timeout.
+ If tcp_rto_max_ms is decreased, it is recommended to also
+ change tcp_retries2.
RFC 1122 recommends at least 100 seconds for the timeout,
which corresponds to a value of at least 8.
@@ -1237,6 +1239,17 @@ tcp_rto_min_us - INTEGER
Default: 200000
+tcp_rto_max_ms - INTEGER
+ Maximal TCP retransmission timeout (in ms).
+ Note that TCP_RTO_MAX_MS socket option has higher precedence.
+
+ When changing tcp_rto_max_ms, it is important to understand
+ that tcp_retries2 might need a change.
+
+ Possible Values: 1000 - 120,000
+
+ Default: 120,000
+
UDP variables
=============
diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst
index 544bad175aae..45f02efe3df5 100644
--- a/Documentation/networking/j1939.rst
+++ b/Documentation/networking/j1939.rst
@@ -66,6 +66,90 @@ the library exclusively, or by the in-kernel system exclusively.
J1939 concepts
==============
+Data Sent to the J1939 Stack
+----------------------------
+
+The data buffers sent to the J1939 stack from user space are not CAN frames
+themselves. Instead, they are payloads that the J1939 stack converts into
+proper CAN frames based on the size of the buffer and the type of transfer. The
+size of the buffer influences how the stack processes the data and determines
+the internal code path used for the transfer.
+
+**Handling of Different Buffer Sizes:**
+
+- **Buffers with a size of 8 bytes or less:**
+
+ - These are handled as simple sessions internally within the stack.
+
+ - The stack converts the buffer directly into a single CAN frame without
+ fragmentation.
+
+ - This type of transfer does not require an actual client (receiver) on the
+ receiving side.
+
+- **Buffers up to 1785 bytes:**
+
+ - These are automatically handled as J1939 Transport Protocol (TP) transfers.
+
+ - Internally, the stack splits the buffer into multiple 8-byte CAN frames.
+
+ - TP transfers can be unicast or broadcast.
+
+ - **Broadcast TP:** Does not require a receiver on the other side and can be
+ used in broadcast scenarios.
+
+ - **Unicast TP:** Requires an active receiver (client) on the other side to
+ acknowledge the transfer.
+
+- **Buffers from 1786 bytes up to 111 MiB:**
+
+ - These are handled as ISO 11783 Extended Transport Protocol (ETP) transfers.
+
+ - ETP transfers are used for larger payloads and are split into multiple CAN
+ frames internally.
+
+ - **ETP transfers (unicast):** Require a receiver on the other side to
+ process the incoming data and acknowledge each step of the transfer.
+
+ - ETP transfers cannot be broadcast like TP transfers, and always require a
+ receiver for operation.
+
+**Non-Blocking Operation with `MSG_DONTWAIT`:**
+
+The J1939 stack supports non-blocking operation when used in combination with
+the `MSG_DONTWAIT` flag. In this mode, the stack attempts to take as much data
+as the available memory for the socket allows. It returns the amount of data
+that was successfully taken, and it is the responsibility of user space to
+monitor this value and handle partial transfers.
+
+- If the stack cannot take the entire buffer, it returns the number of bytes
+ successfully taken, and user space should handle the remainder.
+
+- **Error handling:** When using `MSG_DONTWAIT`, the user must rely on the
+ error queue to detect transfer errors. See the **SO_J1939_ERRQUEUE** section
+ for details on how to subscribe to error notifications. Without the error
+ queue, there is no other way for user space to be notified of transfer errors
+ during non-blocking operations.
+
+**Behavior and Requirements:**
+
+- **Simple transfers (<= 8 bytes):** Do not require a receiver on the other
+ side, making them easy to send without needing address claiming or
+ coordination with a destination.
+
+- **Unicast TP/ETP:** Requires a receiver on the other side to complete the
+ transfer. The receiver must acknowledge the transfer for the session to
+ proceed successfully.
+
+- **Broadcast TP:** Allows sending data without a receiver, but only works for
+ TP transfers. ETP cannot be broadcast and always needs a receiving client.
+
+These different behaviors depend heavily on the size of the buffer provided to
+the stack, and the appropriate transport mechanism (TP or ETP) is selected
+based on the payload size. The stack automatically manages the fragmentation
+and reassembly of large payloads and ensures that the correct CAN frames are
+generated and transmitted for each session.
+
PGN
---
@@ -338,6 +422,459 @@ with ``cmsg_level == SOL_J1939 && cmsg_type == SCM_J1939_DEST_ADDR``,
}
}
+setsockopt(2)
+^^^^^^^^^^^^^
+
+The ``setsockopt(2)`` function is used to configure various socket-level
+options for J1939 communication. The following options are supported:
+
+``SO_J1939_FILTER``
+~~~~~~~~~~~~~~~~~~~
+
+The ``SO_J1939_FILTER`` option is essential when the default behavior of
+``bind(2)`` and ``connect(2)`` is insufficient for specific use cases. By
+default, ``bind(2)`` and ``connect(2)`` allow a socket to be associated with a
+single unicast or broadcast address. However, there are scenarios where finer
+control over the incoming messages is required, such as filtering by Parameter
+Group Number (PGN) rather than by addresses.
+
+For example, in a system where multiple types of J1939 messages are being
+transmitted, a process might only be interested in a subset of those messages,
+such as specific PGNs, and not want to receive all messages destined for its
+address or broadcast to the bus.
+
+By applying the ``SO_J1939_FILTER`` option, you can filter messages based on:
+
+- **Source Address (SA)**: Filter messages coming from specific source
+ addresses.
+
+- **Source Name**: Filter messages coming from ECUs with specific NAME
+ identifiers.
+
+- **Parameter Group Number (PGN)**: Focus on receiving messages with specific
+ PGNs, filtering out irrelevant ones.
+
+This filtering mechanism is particularly useful when:
+
+- You want to receive a subset of messages based on their PGNs, even if the
+ address is the same.
+
+- You need to handle both broadcast and unicast messages but only care about
+ certain message types or parameters.
+
+- The ``bind(2)`` and ``connect(2)`` functions only allow binding to a single
+ address, which might not be sufficient if the process needs to handle multiple
+ PGNs but does not want to open multiple sockets.
+
+To remove existing filters, you can pass ``optval == NULL`` or ``optlen == 0``
+to ``setsockopt(2)``. This will clear all currently set filters. If you want to
+**update** the set of filters, you must pass the updated filter set to
+``setsockopt(2)``, as the new filter set will **replace** the old one entirely.
+This behavior ensures that any previous filter configuration is discarded and
+only the new set is applied.
+
+Example of removing all filters:
+
+.. code-block:: c
+
+ setsockopt(sock, SOL_CAN_J1939, SO_J1939_FILTER, NULL, 0);
+
+**Maximum number of filters:** The maximum amount of filters that can be
+applied using ``SO_J1939_FILTER`` is defined by ``J1939_FILTER_MAX``, which is
+set to 512. This means you can configure up to 512 individual filters to match
+your specific filtering needs.
+
+Practical use case: **Monitoring Address Claiming**
+
+One practical use case is monitoring the J1939 address claiming process by
+filtering for specific PGNs related to address claiming. This allows a process
+to monitor and handle address claims without processing unrelated messages.
+
+Example:
+
+.. code-block:: c
+
+ struct j1939_filter filt[] = {
+ {
+ .pgn = J1939_PGN_ADDRESS_CLAIMED,
+ .pgn_mask = J1939_PGN_PDU1_MAX,
+ }, {
+ .pgn = J1939_PGN_REQUEST,
+ .pgn_mask = J1939_PGN_PDU1_MAX,
+ }, {
+ .pgn = J1939_PGN_ADDRESS_COMMANDED,
+ .pgn_mask = J1939_PGN_MAX,
+ },
+ };
+ setsockopt(sock, SOL_CAN_J1939, SO_J1939_FILTER, &filt, sizeof(filt));
+
+In this example, the socket will only receive messages with the PGNs related to
+address claiming: ``J1939_PGN_ADDRESS_CLAIMED``, ``J1939_PGN_REQUEST``, and
+``J1939_PGN_ADDRESS_COMMANDED``. This is particularly useful in scenarios where
+you want to monitor and process address claims without being overwhelmed by
+other traffic on the J1939 network.
+
+``SO_J1939_PROMISC``
+~~~~~~~~~~~~~~~~~~~~
+
+The ``SO_J1939_PROMISC`` option enables socket-level promiscuous mode. When
+this option is enabled, the socket will receive all J1939 traffic, regardless
+of any filters set by ``bind()`` or ``connect()``. This is analogous to
+enabling promiscuous mode for an Ethernet interface, where all traffic on the
+network segment is captured.
+
+However, **`SO_J1939_FILTER` has a higher priority** compared to
+``SO_J1939_PROMISC``. This means that even in promiscuous mode, you can reduce
+the number of packets received by applying specific filters with
+`SO_J1939_FILTER`. The filters will limit which packets are passed to the
+socket, allowing for more refined traffic selection while promiscuous mode is
+active.
+
+The acceptable value size for this option is ``sizeof(int)``, and the value is
+only differentiated between `0` and non-zero. A value of `0` disables
+promiscuous mode, while any non-zero value enables it.
+
+This combination can be useful for debugging or monitoring specific types of
+traffic while still capturing a broad set of messages.
+
+Example:
+
+.. code-block:: c
+
+ int value = 1;
+ setsockopt(sock, SOL_CAN_J1939, SO_J1939_PROMISC, &value, sizeof(value));
+
+In this example, setting ``value`` to any non-zero value (e.g., `1`) enables
+promiscuous mode, allowing the socket to receive all J1939 traffic on the
+network.
+
+``SO_BROADCAST``
+~~~~~~~~~~~~~~~~
+
+The ``SO_BROADCAST`` option enables the sending and receiving of broadcast
+messages. By default, broadcast messages are disabled for J1939 sockets. When
+this option is enabled, the socket will be allowed to send and receive
+broadcast packets on the J1939 network.
+
+Due to the nature of the CAN bus as a shared medium, all messages transmitted
+on the bus are visible to all participants. In the context of J1939,
+broadcasting refers to using a specific destination address field, where the
+destination address is set to a value that indicates the message is intended
+for all participants (usually a global address such as 0xFF). Enabling the
+broadcast option allows the socket to send and receive such broadcast messages.
+
+The acceptable value size for this option is ``sizeof(int)``, and the value is
+only differentiated between `0` and non-zero. A value of `0` disables the
+ability to send and receive broadcast messages, while any non-zero value
+enables it.
+
+Example:
+
+.. code-block:: c
+
+ int value = 1;
+ setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value));
+
+In this example, setting ``value`` to any non-zero value (e.g., `1`) enables
+the socket to send and receive broadcast messages.
+
+``SO_J1939_SEND_PRIO``
+~~~~~~~~~~~~~~~~~~~~~~
+
+The ``SO_J1939_SEND_PRIO`` option sets the priority of outgoing J1939 messages
+for the socket. In J1939, messages can have different priorities, and lower
+numerical values indicate higher priority. This option allows the user to
+control the priority of messages sent from the socket by adjusting the priority
+bits in the CAN identifier.
+
+The acceptable value **size** for this option is ``sizeof(int)``, and the value
+is expected to be in the range of 0 to 7, where `0` is the highest priority,
+and `7` is the lowest. By default, the priority is set to `6` if this option is
+not explicitly configured.
+
+Note that the priority values `0` and `1` can only be set if the process has
+the `CAP_NET_ADMIN` capability. These are reserved for high-priority traffic
+and require administrative privileges.
+
+Example:
+
+.. code-block:: c
+
+ int prio = 3; // Priority value between 0 (highest) and 7 (lowest)
+ setsockopt(sock, SOL_CAN_J1939, SO_J1939_SEND_PRIO, &prio, sizeof(prio));
+
+In this example, the priority is set to `3`, meaning the outgoing messages will
+be sent with a moderate priority level.
+
+``SO_J1939_ERRQUEUE``
+~~~~~~~~~~~~~~~~~~~~~
+
+The ``SO_J1939_ERRQUEUE`` option enables the socket to receive error messages
+from the error queue, providing diagnostic information about transmission
+failures, protocol violations, or other issues that occur during J1939
+communication. Once this option is set, user space is required to handle
+``MSG_ERRQUEUE`` messages.
+
+Setting ``SO_J1939_ERRQUEUE`` to ``0`` will purge any currently present error
+messages in the error queue. When enabled, error messages can be retrieved
+using the ``recvmsg(2)`` system call.
+
+When subscribing to the error queue, the following error events can be
+accessed:
+
+- **``J1939_EE_INFO_TX_ABORT``**: Transmission abort errors.
+- **``J1939_EE_INFO_RX_RTS``**: Reception of RTS (Request to Send) control
+ frames.
+- **``J1939_EE_INFO_RX_DPO``**: Reception of data packets with Data Page Offset
+ (DPO).
+- **``J1939_EE_INFO_RX_ABORT``**: Reception abort errors.
+
+The error queue can be used to correlate errors with specific message transfer
+sessions using the session ID (``tskey``). The session ID is assigned via the
+``SOF_TIMESTAMPING_OPT_ID`` flag, which is set by enabling the
+``SO_TIMESTAMPING`` option.
+
+If ``SO_J1939_ERRQUEUE`` is activated, the user is required to pull messages
+from the error queue, meaning that using plain ``recv(2)`` is not sufficient
+anymore. The user must use ``recvmsg(2)`` with appropriate flags to handle
+error messages. Failure to do so can result in the socket becoming blocked with
+unprocessed error messages in the queue.
+
+It is **recommended** that ``SO_J1939_ERRQUEUE`` be used in combination with
+``SO_TIMESTAMPING`` in most cases. This enables proper error handling along
+with session tracking and timestamping, providing a more detailed analysis of
+message transfers and errors.
+
+The acceptable value **size** for this option is ``sizeof(int)``, and the value
+is only differentiated between ``0`` and non-zero. A value of ``0`` disables
+error queue reception and purges any existing error messages, while any
+non-zero value enables it.
+
+Example:
+
+.. code-block:: c
+
+ int enable = 1; // Enable error queue reception
+ setsockopt(sock, SOL_CAN_J1939, SO_J1939_ERRQUEUE, &enable, sizeof(enable));
+
+ // Enable timestamping with session tracking via tskey
+ int timestamping = SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_TX_ACK |
+ SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_OPT_CMSG;
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &timestamping,
+ sizeof(timestamping));
+
+When enabled, error messages can be retrieved using ``recvmsg(2)``. By
+combining ``SO_J1939_ERRQUEUE`` with ``SO_TIMESTAMPING`` (with
+``SOF_TIMESTAMPING_OPT_ID`` and ``SOF_TIMESTAMPING_OPT_CMSG`` enabled), the
+user can track message transfers, retrieve precise timestamps, and correlate
+errors with specific sessions.
+
+For more information on enabling timestamps and session tracking, refer to the
+`SO_TIMESTAMPING` section.
+
+``SO_TIMESTAMPING``
+~~~~~~~~~~~~~~~~~~~
+
+The ``SO_TIMESTAMPING`` option allows the socket to receive timestamps for
+various events related to message transmissions and receptions in J1939. This
+option is often used in combination with ``SO_J1939_ERRQUEUE`` to provide
+detailed diagnostic information, session tracking, and precise timing data for
+message transfers.
+
+In J1939, all payloads provided by user space, regardless of size, are
+processed by the kernel as **sessions**. This includes both single-frame
+messages (up to 8 bytes) and multi-frame protocols such as the Transport
+Protocol (TP) and Extended Transport Protocol (ETP). Even for small,
+single-frame messages, the kernel creates a session to manage the transmission
+and reception. The concept of sessions allows the kernel to manage various
+aspects of the protocol, such as reassembling multi-frame messages and tracking
+the status of transmissions.
+
+When receiving extended error messages from the error queue, the error
+information is delivered through a `struct sock_extended_err`, accessible via
+the control message (``cmsg``) retrieved using the ``recvmsg(2)`` system call.
+
+There are two typical origins for the extended error messages in J1939:
+
+1. ``serr->ee_origin == SO_EE_ORIGIN_TIMESTAMPING``:
+
+ In this case, the `serr->ee_info` field will contain one of the following
+ timestamp types:
+
+ - ``SCM_TSTAMP_SCHED``: This timestamp is valid for Extended Transport
+ Protocol (ETP) transfers and simple transfers (8 bytes or less). It
+ indicates when a message or set of frames has been scheduled for
+ transmission.
+
+ - For simple transfers (8 bytes or less), it marks the point when the
+ message is queued and ready to be sent onto the CAN bus.
+
+ - For ETP transfers, it is sent after receiving a CTS (Clear to Send)
+ frame on the sender side, indicating that a new set of frames has been
+ scheduled for transmission.
+
+ - The Transport Protocol (TP) case is currently not implemented for this
+ timestamp.
+
+ - On the receiver side, the counterpart to this event for ETP is
+ represented by the ``J1939_EE_INFO_RX_DPO`` message, which indicates the
+ reception of a Data Page Offset (DPO) control frame.
+
+ - ``SCM_TSTAMP_ACK``: This timestamp indicates the acknowledgment of the
+ message or session.
+
+ - For simple transfers (8 bytes or less), it marks when the message has
+ been sent and an echo confirmation has been received from the CAN
+ controller, indicating that the frame was transmitted onto the bus.
+
+ - For multi-frame transfers (TP or ETP), it signifies that the entire
+ session has been acknowledged, typically after receiving the End of
+ Message Acknowledgment (EOMA) packet.
+
+2. ``serr->ee_origin == SO_EE_ORIGIN_LOCAL``:
+
+ In this case, the `serr->ee_info` field will contain one of the following
+ J1939 stack-specific message types:
+
+ - ``J1939_EE_INFO_TX_ABORT``: This message indicates that the transmission
+ of a message or session was aborted. The cause of the abort can come from
+ various sources:
+
+ - **CAN stack failure**: The J1939 stack was unable to pass the frame to
+ the CAN framework for transmission.
+
+ - **Echo failure**: The J1939 stack did not receive an echo confirmation
+ from the CAN controller, meaning the frame may not have been successfully
+ transmitted to the CAN bus.
+
+ - **Protocol-level issues**: For multi-frame transfers (TP/ETP), this
+ could include protocol-related errors, such as an abort signaled by the
+ receiver or a timeout at the protocol level, which causes the session to
+ terminate prematurely.
+
+ - The corresponding error code is stored in ``serr->ee_data``
+ (``session->err`` on kernel side), providing additional details about
+ the specific reason for the abort.
+
+ - ``J1939_EE_INFO_RX_RTS``: This message indicates that the J1939 stack has
+ received a Request to Send (RTS) control frame, signaling the start of a
+ multi-frame transfer using the Transport Protocol (TP) or Extended
+ Transport Protocol (ETP).
+
+ - It informs the receiver that the sender is ready to transmit a
+ multi-frame message and includes details about the total message size
+ and the number of frames to be sent.
+
+ - Statistics such as ``J1939_NLA_TOTAL_SIZE``, ``J1939_NLA_PGN``,
+ ``J1939_NLA_SRC_NAME``, and ``J1939_NLA_DEST_NAME`` are provided along
+ with the ``J1939_EE_INFO_RX_RTS`` message, giving detailed information
+ about the incoming transfer.
+
+ - ``J1939_EE_INFO_RX_DPO``: This message indicates that the J1939 stack has
+ received a Data Page Offset (DPO) control frame, which is part of the
+ Extended Transport Protocol (ETP).
+
+ - The DPO frame signals the continuation of an ETP multi-frame message by
+ indicating the offset position in the data being transferred. It helps
+ the receiver manage large data sets by identifying which portion of the
+ message is being received.
+
+ - It is typically paired with a corresponding ``SCM_TSTAMP_SCHED`` event
+ on the sender side, which indicates when the next set of frames is
+ scheduled for transmission.
+
+ - This event includes statistics such as ``J1939_NLA_BYTES_ACKED``, which
+ tracks the number of bytes acknowledged up to that point in the session.
+
+ - ``J1939_EE_INFO_RX_ABORT``: This message indicates that the reception of a
+ multi-frame message (Transport Protocol or Extended Transport Protocol) has
+ been aborted.
+
+ - The abort can be triggered by protocol-level errors such as timeouts, an
+ unexpected frame, or a specific abort request from the sender.
+
+ - This message signals that the receiver cannot continue processing the
+ transfer, and the session is terminated.
+
+ - The corresponding error code is stored in ``serr->ee_data``
+ (``session->err`` on kernel side ), providing further details about the
+ reason for the abort, such as protocol violations or timeouts.
+
+ - After receiving this message, the receiver discards the partially received
+ frames, and the multi-frame session is considered incomplete.
+
+In both cases, if ``SOF_TIMESTAMPING_OPT_ID`` is enabled, ``serr->ee_data``
+will be set to the session’s unique identifier (``session->tskey``). This
+allows user space to track message transfers by their session identifier across
+multiple frames or stages.
+
+In all other cases, ``serr->ee_errno`` will be set to ``ENOMSG``, except for
+the ``J1939_EE_INFO_TX_ABORT`` and ``J1939_EE_INFO_RX_ABORT`` cases, where the
+kernel sets ``serr->ee_data`` to the error stored in ``session->err``. All
+protocol-specific errors are converted to standard kernel error values and
+stored in ``session->err``. These error values are unified across system calls
+and ``serr->ee_errno``. Some of the known error values are described in the
+`Error Codes in the J1939 Stack` section.
+
+When the `J1939_EE_INFO_RX_RTS` message is provided, it will include the
+following statistics for multi-frame messages (TP and ETP):
+
+ - ``J1939_NLA_TOTAL_SIZE``: Total size of the message in the session.
+ - ``J1939_NLA_PGN``: Parameter Group Number (PGN) identifying the message type.
+ - ``J1939_NLA_SRC_NAME``: 64-bit name of the source ECU.
+ - ``J1939_NLA_DEST_NAME``: 64-bit name of the destination ECU.
+ - ``J1939_NLA_SRC_ADDR``: 8-bit source address of the sending ECU.
+ - ``J1939_NLA_DEST_ADDR``: 8-bit destination address of the receiving ECU.
+
+- For other messages (including single-frame messages), only the following
+ statistic is included:
+
+ - ``J1939_NLA_BYTES_ACKED``: Number of bytes successfully acknowledged in the
+ session.
+
+The key flags for ``SO_TIMESTAMPING`` include:
+
+- ``SOF_TIMESTAMPING_OPT_ID``: Enables the use of a unique session identifier
+ (``tskey``) for each transfer. This identifier helps track message transfers
+ and errors as distinct sessions in user space. When this option is enabled,
+ ``serr->ee_data`` will be set to ``session->tskey``.
+
+- ``SOF_TIMESTAMPING_OPT_CMSG``: Sends timestamp information through control
+ messages (``struct scm_timestamping``), allowing the application to retrieve
+ timestamps alongside the data.
+
+- ``SOF_TIMESTAMPING_TX_SCHED``: Provides the timestamp for when a message is
+ scheduled for transmission (``SCM_TSTAMP_SCHED``).
+
+- ``SOF_TIMESTAMPING_TX_ACK``: Provides the timestamp for when a message
+ transmission is fully acknowledged (``SCM_TSTAMP_ACK``).
+
+- ``SOF_TIMESTAMPING_RX_SOFTWARE``: Provides timestamps for reception-related
+ events (e.g., ``J1939_EE_INFO_RX_RTS``, ``J1939_EE_INFO_RX_DPO``,
+ ``J1939_EE_INFO_RX_ABORT``).
+
+These flags enable detailed monitoring of message lifecycles, including
+transmission scheduling, acknowledgments, reception timestamps, and gathering
+detailed statistics about the communication session, especially for multi-frame
+payloads like TP and ETP.
+
+Example:
+
+.. code-block:: c
+
+ // Enable timestamping with various options, including session tracking and
+ // statistics
+ int sock_opt = SOF_TIMESTAMPING_OPT_CMSG |
+ SOF_TIMESTAMPING_TX_ACK |
+ SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_OPT_ID |
+ SOF_TIMESTAMPING_RX_SOFTWARE;
+
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &sock_opt, sizeof(sock_opt));
+
+
+
Dynamic Addressing
------------------
@@ -458,3 +995,141 @@ Send:
};
sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
+
+
+Error Codes in the J1939 Stack
+------------------------------
+
+This section lists all potential kernel error codes that can be exposed to user
+space when interacting with the J1939 stack. It includes both standard error
+codes and those derived from protocol-specific abort codes.
+
+- ``EAGAIN``: Operation would block; retry may succeed. One common reason is
+ that an active TP or ETP session exists, and an attempt was made to start a
+ new overlapping TP or ETP session between the same peers.
+
+- ``ENETDOWN``: Network is down. This occurs when the CAN interface is switched
+ to the "down" state.
+
+- ``ENOBUFS``: No buffer space available. This error occurs when the CAN
+ interface's transmit (TX) queue is full, and no more messages can be queued.
+
+- ``EOVERFLOW``: Value too large for defined data type. In J1939, this can
+ happen if the requested data lies outside of the queued buffer. For example,
+ if a CTS (Clear to Send) requests an offset not available in the kernel buffer
+ because user space did not provide enough data.
+
+- ``EBUSY``: Device or resource is busy. For example, this occurs if an
+ identical session is already active and the stack is unable to recover from
+ the condition.
+
+- ``EACCES``: Permission denied. This error can occur, for example, when
+ attempting to send broadcast messages, but the socket is not configured with
+ ``SO_BROADCAST``.
+
+- ``EADDRNOTAVAIL``: Address not available. This error occurs in cases such as:
+
+ - When attempting to use ``getsockname(2)`` to retrieve the peer's address,
+ but the socket is not connected.
+
+ - When trying to send data to or from a NAME, but address claiming for the
+ NAME was not performed or detected by the stack.
+
+- ``EBADFD``: File descriptor in bad state. This error can occur if:
+
+ - Attempting to send data to an unbound socket.
+
+ - The socket is bound but has no source name, and the source address is
+ ``J1939_NO_ADDR``.
+
+ - The ``can_ifindex`` is incorrect.
+
+- ``EFAULT``: Bad address. Occurs mostly when the stack can't copy from or to a
+ sockptr, when there is insufficient data from user space, or when the buffer
+ provided by user space is not large enough for the requested data.
+
+- ``EINTR``: A signal occurred before any data was transmitted; see ``signal(7)``.
+
+- ``EINVAL``: Invalid argument passed. For example:
+
+ - ``msg->msg_namelen`` is less than ``J1939_MIN_NAMELEN``.
+
+ - ``addr->can_family`` is not equal to ``AF_CAN``.
+
+ - An incorrect PGN was provided.
+
+- ``ENODEV``: No such device. This happens when the CAN network device cannot
+ be found for the provided ``can_ifindex`` or if ``can_ifindex`` is 0.
+
+- ``ENOMEM``: Out of memory. Typically related to issues with memory allocation
+ in the stack.
+
+- ``ENOPROTOOPT``: Protocol not available. This can occur when using
+ ``getsockopt(2)`` or ``setsockopt(2)`` if the requested socket option is not
+ available.
+
+- ``EDESTADDRREQ``: Destination address required. This error occurs:
+
+ - In the case of ``connect(2)``, if the ``struct sockaddr *uaddr`` is ``NULL``.
+
+ - In the case of ``send*(2)``, if there is an attempt to send an ETP message
+ to a broadcast address.
+
+- ``EDOM``: Argument out of domain. This error may happen if attempting to send
+ a TP or ETP message to a PGN that is reserved for control PGNs for TP or ETP
+ operations.
+
+- ``EIO``: I/O error. This can occur if the amount of data provided to the
+ socket for a TP or ETP session does not match the announced amount of data for
+ the session.
+
+- ``ENOENT``: No such file or directory. This can happen when the stack
+ attempts to transfer CTS or EOMA but cannot find a matching receiving socket
+ anymore.
+
+- ``ENOIOCTLCMD``: No ioctls are available for the socket layer.
+
+- ``EPERM``: Operation not permitted. For example, this can occur if a
+ requested action requires ``CAP_NET_ADMIN`` privileges.
+
+- ``ENETUNREACH``: Network unreachable. Most likely, this occurs when frames
+ cannot be transmitted to the CAN bus.
+
+- ``ETIME``: Timer expired. This can happen if a timeout occurs while
+ attempting to send a simple message, for example, when an echo message from
+ the controller is not received.
+
+- ``EPROTO``: Protocol error.
+
+ - Used for various protocol-level errors in J1939, including:
+
+ - Duplicate sequence number.
+
+ - Unexpected EDPO or ECTS packet.
+
+ - Invalid PGN or offset in EDPO/ECTS.
+
+ - Number of EDPO packets exceeded CTS allowance.
+
+ - Any other protocol-level error.
+
+- ``EMSGSIZE``: Message too long.
+
+- ``ENOMSG``: No message available.
+
+- ``EALREADY``: The ECU is already engaged in one or more connection-managed
+ sessions and cannot support another.
+
+- ``EHOSTUNREACH``: A timeout occurred, and the session was aborted.
+
+- ``EBADMSG``: CTS (Clear to Send) messages were received during an active data
+ transfer, causing an abort.
+
+- ``ENOTRECOVERABLE``: The maximum retransmission request limit was reached,
+ and the session cannot recover.
+
+- ``ENOTCONN``: An unexpected data transfer packet was received.
+
+- ``EILSEQ``: A bad sequence number was received, and the software could not
+ recover.
+
diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst
index f970a2be271a..d0e3953cae6a 100644
--- a/Documentation/networking/napi.rst
+++ b/Documentation/networking/napi.rst
@@ -171,12 +171,43 @@ a channel as an IRQ/NAPI which services queues of a given type. For example,
a configuration of 1 ``rx``, 1 ``tx`` and 1 ``combined`` channel is expected
to utilize 3 interrupts, 2 Rx and 2 Tx queues.
+Persistent NAPI config
+----------------------
+
+Drivers often allocate and free NAPI instances dynamically. This leads to loss
+of NAPI-related user configuration each time NAPI instances are reallocated.
+The netif_napi_add_config() API prevents this loss of configuration by
+associating each NAPI instance with a persistent NAPI configuration based on
+a driver defined index value, like a queue number.
+
+Using this API allows for persistent NAPI IDs (among other settings), which can
+be beneficial to userspace programs using ``SO_INCOMING_NAPI_ID``. See the
+sections below for other NAPI configuration settings.
+
+Drivers should try to use netif_napi_add_config() whenever possible.
+
User API
========
User interactions with NAPI depend on NAPI instance ID. The instance IDs
are only visible to the user thru the ``SO_INCOMING_NAPI_ID`` socket option.
-It's not currently possible to query IDs used by a given device.
+
+Users can query NAPI IDs for a device or device queue using netlink. This can
+be done programmatically in a user application or by using a script included in
+the kernel source tree: ``tools/net/ynl/pyynl/cli.py``.
+
+For example, using the script to dump all of the queues for a device (which
+will reveal each queue's NAPI ID):
+
+.. code-block:: bash
+
+ $ kernel-source/tools/net/ynl/pyynl/cli.py \
+ --spec Documentation/netlink/specs/netdev.yaml \
+ --dump queue-get \
+ --json='{"ifindex": 2}'
+
+See ``Documentation/netlink/specs/netdev.yaml`` for more details on
+available operations and attributes.
Software IRQ coalescing
-----------------------
diff --git a/Documentation/networking/net_cachelines/inet_connection_sock.rst b/Documentation/networking/net_cachelines/inet_connection_sock.rst
index 4a15627fc93b..b2401aa7c450 100644
--- a/Documentation/networking/net_cachelines/inet_connection_sock.rst
+++ b/Documentation/networking/net_cachelines/inet_connection_sock.rst
@@ -17,6 +17,7 @@ struct timer_list icsk_retransmit_timer read_mostly
struct timer_list icsk_delack_timer read_mostly inet_csk_reset_xmit_timer,tcp_connect
u32 icsk_rto read_write tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
u32 icsk_rto_min
+u32 icsk_rto_max read_mostly tcp_reset_xmit_timer
u32 icsk_delack_max
u32 icsk_pmtu_cookie read_write tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect
struct tcp_congestion_ops icsk_ca_ops read_write tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit
diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
index de0263302f16..6e7b20afd2d4 100644
--- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
+++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
@@ -86,6 +86,7 @@ u8 sysctl_tcp_sack
u8 sysctl_tcp_window_scaling tcp_syn_options,tcp_parse_options
u8 sysctl_tcp_timestamps
u8 sysctl_tcp_early_retrans read_mostly tcp_schedule_loss_probe(tcp_write_xmit)
+u32 sysctl_tcp_rto_max_ms
u8 sysctl_tcp_recovery tcp_fastretrans_alert
u8 sysctl_tcp_thin_linear_timeouts tcp_retrans_timer(on_thin_streams)
u8 sysctl_tcp_slow_start_after_idle unlikely(tcp_cwnd_validate-network-not-starved)
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
index 94c4680fdf3e..84803c59968a 100644
--- a/Documentation/networking/netconsole.rst
+++ b/Documentation/networking/netconsole.rst
@@ -17,6 +17,8 @@ Release prepend support by Breno Leitao <leitao@debian.org>, Jul 7 2023
Userdata append support by Matthew Wood <thepacketgeek@gmail.com>, Jan 22 2024
+Sysdata append support by Breno Leitao <leitao@debian.org>, Jan 15 2025
+
Please send bug reports to Matt Mackall <mpm@selenic.com>
Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com>
@@ -238,6 +240,49 @@ Delete `userdata` entries with `rmdir`::
It is recommended to not write user data values with newlines.
+CPU number auto population in userdata
+--------------------------------------
+
+Inside the netconsole configfs hierarchy, there is a file called
+`cpu_nr` under the `userdata` directory. This file is used to enable or disable
+the automatic CPU number population feature. This feature automatically
+populates the CPU number that is sending the message.
+
+To enable the CPU number auto-population::
+
+ echo 1 > /sys/kernel/config/netconsole/target1/userdata/cpu_nr
+
+When this option is enabled, the netconsole messages will include an additional
+line in the userdata field with the format `cpu=<cpu_number>`. This allows the
+receiver of the netconsole messages to easily differentiate and demultiplex
+messages originating from different CPUs, which is particularly useful when
+dealing with parallel log output.
+
+Example::
+
+ echo "This is a message" > /dev/kmsg
+ 12,607,22085407756,-;This is a message
+ cpu=42
+
+In this example, the message was sent by CPU 42.
+
+.. note::
+
+ If the user has set a conflicting `cpu` key in the userdata dictionary,
+ both keys will be reported, with the kernel-populated entry appearing after
+ the user one. For example::
+
+ # User-defined CPU entry
+ mkdir -p /sys/kernel/config/netconsole/target1/userdata/cpu
+ echo "1" > /sys/kernel/config/netconsole/target1/userdata/cpu/value
+
+ Output might look like::
+
+ 12,607,22085407756,-;This is a message
+ cpu=1
+ cpu=42 # kernel-populated value
+
+
Extended console:
=================
diff --git a/MAINTAINERS b/MAINTAINERS
index 3864d473f52f..b0cd54818bb3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24159,7 +24159,7 @@ W: http://vtun.sourceforge.net/tun
F: Documentation/networking/tuntap.rst
F: arch/um/os-Linux/drivers/
F: drivers/net/tap.c
-F: drivers/net/tun.c
+F: drivers/net/tun*
TURBOCHANNEL SUBSYSTEM
M: "Maciej W. Rozycki" <macro@orcam.me.uk>
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index d9e705f4a697..bde6a496df5f 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -54,7 +54,6 @@ enum interruption_class {
IRQIO_C70,
IRQIO_TAP,
IRQIO_VMR,
- IRQIO_LCS,
IRQIO_CTC,
IRQIO_ADM,
IRQIO_CSC,
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index ef7be599e1f7..7ca157ffab30 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -84,7 +84,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
{.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
- {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index ad50b77282f8..69ce1862eabe 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -498,8 +498,6 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf)
iw_qvlist->num_vectors = rf->msix_count;
if (rf->msix_count <= num_online_cpus())
rf->msix_shared = true;
- else if (rf->msix_count > num_online_cpus() + 1)
- rf->msix_count = num_online_cpus() + 1;
pmsix = rf->msix_entries;
for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 3f13200ff71b..1ee8969595d3 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -206,6 +206,43 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
}
+static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf)
+{
+ int i;
+
+ rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
+ rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
+ GFP_KERNEL);
+ if (!rf->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < rf->msix_count; i++)
+ if (ice_alloc_rdma_qvector(pf, &rf->msix_entries[i]))
+ break;
+
+ if (i < IRDMA_MIN_MSIX) {
+ for (; i > 0; i--)
+ ice_free_rdma_qvector(pf, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+ return -ENOMEM;
+ }
+
+ rf->msix_count = i;
+
+ return 0;
+}
+
+static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf)
+{
+ int i;
+
+ for (i = 0; i < rf->msix_count; i++)
+ ice_free_rdma_qvector(pf, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+}
+
static void irdma_remove(struct auxiliary_device *aux_dev)
{
struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev,
@@ -216,6 +253,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
irdma_ib_unregister_device(iwdev);
ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false);
+ irdma_deinit_interrupts(iwdev->rf, pf);
pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
}
@@ -230,9 +268,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf
rf->gen_ops.unregister_qset = irdma_lan_unregister_qset;
rf->hw.hw_addr = pf->hw.hw_addr;
rf->pcidev = pf->pdev;
- rf->msix_count = pf->num_rdma_msix;
rf->pf_id = pf->hw.pf_id;
- rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector];
rf->default_vsi.vsi_idx = vsi->vsi_num;
rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ?
IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
@@ -281,6 +317,10 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
irdma_fill_device_info(iwdev, pf, vsi);
rf = iwdev->rf;
+ err = irdma_init_interrupts(rf, pf);
+ if (err)
+ goto err_init_interrupts;
+
err = irdma_ctrl_init_hw(rf);
if (err)
goto err_ctrl_init;
@@ -311,6 +351,8 @@ err_ibreg:
err_rt_init:
irdma_ctrl_deinit_hw(rf);
err_ctrl_init:
+ irdma_deinit_interrupts(rf, pf);
+err_init_interrupts:
kfree(iwdev->rf);
ib_dealloc_device(&iwdev->ibdev);
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index 9f0ed6e84471..ef9a9b79d711 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -117,6 +117,9 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_IRQ_NAME_STR_LEN (64)
+#define IRDMA_NUM_AEQ_MSIX 1
+#define IRDMA_MIN_MSIX 2
+
enum init_completion_state {
INVALID_STATE = 0,
INITIAL_STATE,
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e45bba240cbc..f6d0628a36d9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -432,9 +432,6 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs)
struct bonding *bond;
struct slave *slave;
- if (!bond_dev)
- return NULL;
-
bond = netdev_priv(bond_dev);
if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)
return NULL;
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index 399844809bbe..19c86b94a40e 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -269,30 +269,22 @@ static int c_can_plat_probe(struct platform_device *pdev)
/* get the appropriate clk */
clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(clk)) {
- ret = PTR_ERR(clk);
- goto exit;
- }
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
/* get the platform data */
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- ret = irq;
- goto exit;
- }
+ if (irq < 0)
+ return irq;
addr = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto exit;
- }
+ if (IS_ERR(addr))
+ return PTR_ERR(addr);
/* allocate the c_can device */
dev = alloc_c_can_dev(drvdata->msg_obj_num);
- if (!dev) {
- ret = -ENOMEM;
- goto exit;
- }
+ if (!dev)
+ return -ENOMEM;
priv = netdev_priv(dev);
switch (drvdata->id) {
@@ -324,33 +316,22 @@ static int c_can_plat_probe(struct platform_device *pdev)
/* Check if we need custom RAMINIT via syscon. Mostly for TI
* platforms. Only supported with DT boot.
*/
- if (np && of_property_read_bool(np, "syscon-raminit")) {
+ if (np && of_property_present(np, "syscon-raminit")) {
+ unsigned int args[2];
u32 id;
struct c_can_raminit *raminit = &priv->raminit_sys;
ret = -EINVAL;
- raminit->syscon = syscon_regmap_lookup_by_phandle(np,
- "syscon-raminit");
+ raminit->syscon = syscon_regmap_lookup_by_phandle_args(np,
+ "syscon-raminit",
+ 2, args);
if (IS_ERR(raminit->syscon)) {
- /* can fail with -EPROBE_DEFER */
ret = PTR_ERR(raminit->syscon);
- free_c_can_dev(dev);
- return ret;
- }
-
- if (of_property_read_u32_index(np, "syscon-raminit", 1,
- &raminit->reg)) {
- dev_err(&pdev->dev,
- "couldn't get the RAMINIT reg. offset!\n");
goto exit_free_device;
}
- if (of_property_read_u32_index(np, "syscon-raminit", 2,
- &id)) {
- dev_err(&pdev->dev,
- "couldn't get the CAN instance ID\n");
- goto exit_free_device;
- }
+ raminit->reg = args[0];
+ id = args[1];
if (id >= drvdata->raminit_num) {
dev_err(&pdev->dev,
@@ -396,8 +377,6 @@ exit_pm_runtime:
pm_runtime_disable(priv->device);
exit_free_device:
free_c_can_dev(dev);
-exit:
- dev_err(&pdev->dev, "probe failed\n");
return ret;
}
diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c
index ac1a860986df..b347a1c93536 100644
--- a/drivers/net/can/flexcan/flexcan-core.c
+++ b/drivers/net/can/flexcan/flexcan-core.c
@@ -386,6 +386,16 @@ static const struct flexcan_devtype_data fsl_lx2160a_r1_devtype_data = {
FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR,
};
+static const struct flexcan_devtype_data nxp_s32g2_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_SUPPORT_FD |
+ FLEXCAN_QUIRK_SUPPORT_ECC | FLEXCAN_QUIRK_NR_IRQ_3 |
+ FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR |
+ FLEXCAN_QUIRK_SECONDARY_MB_IRQ,
+};
+
static const struct can_bittiming_const flexcan_bittiming_const = {
.name = DRV_NAME,
.tseg1_min = 4,
@@ -1762,14 +1772,25 @@ static int flexcan_open(struct net_device *dev)
goto out_free_irq_boff;
}
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ) {
+ err = request_irq(priv->irq_secondary_mb,
+ flexcan_irq, IRQF_SHARED, dev->name, dev);
+ if (err)
+ goto out_free_irq_err;
+ }
+
flexcan_chip_interrupts_enable(dev);
netif_start_queue(dev);
return 0;
+ out_free_irq_err:
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3)
+ free_irq(priv->irq_err, dev);
out_free_irq_boff:
- free_irq(priv->irq_boff, dev);
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3)
+ free_irq(priv->irq_boff, dev);
out_free_irq:
free_irq(dev->irq, dev);
out_can_rx_offload_disable:
@@ -1794,6 +1815,9 @@ static int flexcan_close(struct net_device *dev)
netif_stop_queue(dev);
flexcan_chip_interrupts_disable(dev);
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ)
+ free_irq(priv->irq_secondary_mb, dev);
+
if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
free_irq(priv->irq_err, dev);
free_irq(priv->irq_boff, dev);
@@ -2041,6 +2065,7 @@ static const struct of_device_id flexcan_of_match[] = {
{ .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, },
{ .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, },
{ .compatible = "fsl,lx2160ar1-flexcan", .data = &fsl_lx2160a_r1_devtype_data, },
+ { .compatible = "nxp,s32g2-flexcan", .data = &nxp_s32g2_devtype_data, },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, flexcan_of_match);
@@ -2187,6 +2212,14 @@ static int flexcan_probe(struct platform_device *pdev)
}
}
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ) {
+ priv->irq_secondary_mb = platform_get_irq_byname(pdev, "mb-1");
+ if (priv->irq_secondary_mb < 0) {
+ err = priv->irq_secondary_mb;
+ goto failed_platform_get_irq;
+ }
+ }
+
if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD |
CAN_CTRLMODE_FD_NON_ISO;
diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h
index 4933d8c7439e..2cf886618c96 100644
--- a/drivers/net/can/flexcan/flexcan.h
+++ b/drivers/net/can/flexcan/flexcan.h
@@ -70,6 +70,10 @@
#define FLEXCAN_QUIRK_SUPPORT_RX_FIFO BIT(16)
/* Setup stop mode with ATF SCMI protocol to support wakeup */
#define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCMI BIT(17)
+/* Device has two separate interrupt lines for two mailbox ranges, which
+ * both need to have an interrupt handler registered.
+ */
+#define FLEXCAN_QUIRK_SECONDARY_MB_IRQ BIT(18)
struct flexcan_devtype_data {
u32 quirks; /* quirks needed for different IP cores */
@@ -107,6 +111,7 @@ struct flexcan_priv {
int irq_boff;
int irq_err;
+ int irq_secondary_mb;
/* IPC handle when setup stop mode by System Controller firmware(scfw) */
struct imx_sc_ipc *sc_ipc_handle;
diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c
index d9a937ba126c..46201c126703 100644
--- a/drivers/net/can/rockchip/rockchip_canfd-core.c
+++ b/drivers/net/can/rockchip/rockchip_canfd-core.c
@@ -236,11 +236,6 @@ static void rkcanfd_chip_fifo_setup(struct rkcanfd_priv *priv)
{
u32 reg;
- /* TXE FIFO */
- reg = rkcanfd_read(priv, RKCANFD_REG_RX_FIFO_CTRL);
- reg |= RKCANFD_REG_RX_FIFO_CTRL_RX_FIFO_ENABLE;
- rkcanfd_write(priv, RKCANFD_REG_RX_FIFO_CTRL, reg);
-
/* RX FIFO */
reg = rkcanfd_read(priv, RKCANFD_REG_RX_FIFO_CTRL);
reg |= RKCANFD_REG_RX_FIFO_CTRL_RX_FIFO_ENABLE;
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index b6f4de375df7..3ccac6781b98 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -43,6 +43,9 @@
#define USB_XYLANTA_SAINT3_VENDOR_ID 0x16d0
#define USB_XYLANTA_SAINT3_PRODUCT_ID 0x0f30
+#define USB_CANNECTIVITY_VENDOR_ID 0x1209
+#define USB_CANNECTIVITY_PRODUCT_ID 0xca01
+
/* Timestamp 32 bit timer runs at 1 MHz (1 µs tick). Worker accounts
* for timer overflow (will be after ~71 minutes)
*/
@@ -1546,6 +1549,8 @@ static const struct usb_device_id gs_usb_table[] = {
USB_ABE_CANDEBUGGER_FD_PRODUCT_ID, 0) },
{ USB_DEVICE_INTERFACE_NUMBER(USB_XYLANTA_SAINT3_VENDOR_ID,
USB_XYLANTA_SAINT3_PRODUCT_ID, 0) },
+ { USB_DEVICE_INTERFACE_NUMBER(USB_CANNECTIVITY_VENDOR_ID,
+ USB_CANNECTIVITY_PRODUCT_ID, 0) },
{} /* Terminating entry */
};
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 79dc77835681..61d164ffb3ae 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2410,6 +2410,19 @@ static const struct b53_chip_data b53_switch_chips[] = {
.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
},
{
+ .chip_id = BCM53101_DEVICE_ID,
+ .dev_name = "BCM53101",
+ .vlans = 4096,
+ .enabled_ports = 0x11f,
+ .arl_bins = 4,
+ .arl_buckets = 512,
+ .vta_regs = B53_VTA_REGS,
+ .imp_port = 8,
+ .duplex_reg = B53_DUPLEX_STAT_GE,
+ .jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+ .jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+ },
+ {
.chip_id = BCM53115_DEVICE_ID,
.dev_name = "BCM53115",
.vlans = 4096,
@@ -2789,6 +2802,7 @@ int b53_switch_detect(struct b53_device *dev)
return ret;
switch (id32) {
+ case BCM53101_DEVICE_ID:
case BCM53115_DEVICE_ID:
case BCM53125_DEVICE_ID:
case BCM53128_DEVICE_ID:
diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index 31d070bf161a..43a3b37b731b 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -374,6 +374,7 @@ static void b53_mdio_shutdown(struct mdio_device *mdiodev)
static const struct of_device_id b53_of_match[] = {
{ .compatible = "brcm,bcm5325" },
+ { .compatible = "brcm,bcm53101" },
{ .compatible = "brcm,bcm53115" },
{ .compatible = "brcm,bcm53125" },
{ .compatible = "brcm,bcm53128" },
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 9e9b5bc0c5d6..0166c37a13a7 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -66,6 +66,7 @@ enum {
BCM5395_DEVICE_ID = 0x95,
BCM5397_DEVICE_ID = 0x97,
BCM5398_DEVICE_ID = 0x98,
+ BCM53101_DEVICE_ID = 0x53101,
BCM53115_DEVICE_ID = 0x53115,
BCM53125_DEVICE_ID = 0x53125,
BCM53128_DEVICE_ID = 0x53128,
@@ -188,6 +189,7 @@ static inline int is531x5(struct b53_device *dev)
{
return dev->chip_id == BCM53115_DEVICE_ID ||
dev->chip_id == BCM53125_DEVICE_ID ||
+ dev->chip_id == BCM53101_DEVICE_ID ||
dev->chip_id == BCM53128_DEVICE_ID ||
dev->chip_id == BCM53134_DEVICE_ID;
}
diff --git a/drivers/net/dsa/b53/b53_serdes.c b/drivers/net/dsa/b53/b53_serdes.c
index 4730982b6840..7460122f6abc 100644
--- a/drivers/net/dsa/b53/b53_serdes.c
+++ b/drivers/net/dsa/b53/b53_serdes.c
@@ -239,7 +239,6 @@ int b53_serdes_init(struct b53_device *dev, int port)
pcs->dev = dev;
pcs->lane = lane;
pcs->pcs.ops = &b53_pcs_ops;
- pcs->pcs.neg_mode = true;
return 0;
}
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 1c83af805209..8422262febaf 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2957,28 +2957,61 @@ static void mt753x_phylink_mac_link_up(struct phylink_config *config,
mcr |= PMCR_FORCE_RX_FC_EN;
}
- if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
- switch (speed) {
- case SPEED_1000:
- case SPEED_2500:
- mcr |= PMCR_FORCE_EEE1G;
- break;
- case SPEED_100:
- mcr |= PMCR_FORCE_EEE100;
- break;
- }
- }
-
mt7530_set(priv, MT753X_PMCR_P(dp->index), mcr);
}
+static void mt753x_phylink_mac_disable_tx_lpi(struct phylink_config *config)
+{
+ struct dsa_port *dp = dsa_phylink_to_port(config);
+ struct mt7530_priv *priv = dp->ds->priv;
+
+ mt7530_clear(priv, MT753X_PMCR_P(dp->index),
+ PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100);
+}
+
+static int mt753x_phylink_mac_enable_tx_lpi(struct phylink_config *config,
+ u32 timer, bool tx_clock_stop)
+{
+ struct dsa_port *dp = dsa_phylink_to_port(config);
+ struct mt7530_priv *priv = dp->ds->priv;
+ u32 val;
+
+ /* If the timer is zero, then set LPI_MODE_EN, which allows the
+ * system to enter LPI mode immediately rather than waiting for
+ * the LPI threshold.
+ */
+ if (!timer)
+ val = LPI_MODE_EN;
+ else if (FIELD_FIT(LPI_THRESH_MASK, timer))
+ val = FIELD_PREP(LPI_THRESH_MASK, timer);
+ else
+ val = LPI_THRESH_MASK;
+
+ mt7530_rmw(priv, MT753X_PMEEECR_P(dp->index),
+ LPI_THRESH_MASK | LPI_MODE_EN, val);
+
+ mt7530_set(priv, MT753X_PMCR_P(dp->index),
+ PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100);
+
+ return 0;
+}
+
static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port,
struct phylink_config *config)
{
struct mt7530_priv *priv = ds->priv;
+ u32 eeecr;
config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE;
+ config->lpi_capabilities = MAC_100FD | MAC_1000FD | MAC_2500FD;
+
+ eeecr = mt7530_read(priv, MT753X_PMEEECR_P(port));
+ /* tx_lpi_timer should be in microseconds. The time units for
+ * LPI threshold are unspecified.
+ */
+ config->lpi_timer_default = FIELD_GET(LPI_THRESH_MASK, eeecr);
+
priv->info->mac_port_get_caps(ds, port, config);
}
@@ -3071,7 +3104,6 @@ mt753x_setup(struct dsa_switch *ds)
/* Initialise the PCS devices */
for (i = 0; i < priv->ds->num_ports; i++) {
priv->pcs[i].pcs.ops = priv->info->pcs_ops;
- priv->pcs[i].pcs.neg_mode = true;
priv->pcs[i].priv = priv;
priv->pcs[i].port = i;
}
@@ -3088,18 +3120,9 @@ mt753x_setup(struct dsa_switch *ds)
static int mt753x_set_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_keee *e)
{
- struct mt7530_priv *priv = ds->priv;
- u32 set, mask = LPI_THRESH_MASK | LPI_MODE_EN;
-
if (e->tx_lpi_timer > 0xFFF)
return -EINVAL;
- set = LPI_THRESH_SET(e->tx_lpi_timer);
- if (!e->tx_lpi_enabled)
- /* Force LPI Mode without a delay */
- set |= LPI_MODE_EN;
- mt7530_rmw(priv, MT753X_PMEEECR_P(port), mask, set);
-
return 0;
}
@@ -3238,6 +3261,8 @@ static const struct phylink_mac_ops mt753x_phylink_mac_ops = {
.mac_config = mt753x_phylink_mac_config,
.mac_link_down = mt753x_phylink_mac_link_down,
.mac_link_up = mt753x_phylink_mac_link_up,
+ .mac_disable_tx_lpi = mt753x_phylink_mac_disable_tx_lpi,
+ .mac_enable_tx_lpi = mt753x_phylink_mac_enable_tx_lpi,
};
const struct mt753x_info mt753x_table[] = {
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6185.c b/drivers/net/dsa/mv88e6xxx/pcs-6185.c
index 75ed1fa500a5..af7e06d265f7 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-6185.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-6185.c
@@ -138,7 +138,6 @@ static int mv88e6185_pcs_init(struct mv88e6xxx_chip *chip, int port)
mpcs->chip = chip;
mpcs->port = port;
mpcs->phylink_pcs.ops = &mv88e6185_phylink_pcs_ops;
- mpcs->phylink_pcs.neg_mode = true;
irq = mv88e6xxx_serdes_irq_mapping(chip, port);
if (irq) {
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6352.c b/drivers/net/dsa/mv88e6xxx/pcs-6352.c
index 143fe21d1834..36993400837e 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-6352.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-6352.c
@@ -275,7 +275,6 @@ static struct marvell_c22_pcs *marvell_c22_pcs_alloc(struct device *dev,
mpcs->mdio.bus = bus;
mpcs->mdio.addr = addr;
mpcs->phylink_pcs.ops = &marvell_c22_pcs_ops;
- mpcs->phylink_pcs.neg_mode = true;
return mpcs;
}
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
index 59f63d6beec8..5db17c0b77f5 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
@@ -565,9 +565,7 @@ static int mv88e6390_pcs_init(struct mv88e6xxx_chip *chip, int port)
return -ENOMEM;
mpcs->sgmii_pcs.ops = &mv88e639x_sgmii_pcs_ops;
- mpcs->sgmii_pcs.neg_mode = true;
mpcs->xg_pcs.ops = &mv88e6390_xg_pcs_ops;
- mpcs->xg_pcs.neg_mode = true;
if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X ||
chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X)
@@ -945,9 +943,7 @@ static int mv88e6393x_pcs_init(struct mv88e6xxx_chip *chip, int port)
return -ENOMEM;
mpcs->sgmii_pcs.ops = &mv88e6393x_sgmii_pcs_ops;
- mpcs->sgmii_pcs.neg_mode = true;
mpcs->xg_pcs.ops = &mv88e6393x_xg_pcs_ops;
- mpcs->xg_pcs.neg_mode = true;
mpcs->supports_5g = true;
err = mv88e6393x_erratum_4_6(mpcs);
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index e8cb4da15dbe..a36b8b07030e 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -1634,7 +1634,6 @@ static void qca8k_setup_pcs(struct qca8k_priv *priv, struct qca8k_pcs *qpcs,
int port)
{
qpcs->pcs.ops = &qca8k_pcs_ops;
- qpcs->pcs.neg_mode = true;
/* We don't have interrupts for link changes, so we need to poll */
qpcs->pcs.poll = true;
diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c
index 66974379334a..31ea8130a495 100644
--- a/drivers/net/dsa/rzn1_a5psw.c
+++ b/drivers/net/dsa/rzn1_a5psw.c
@@ -1248,18 +1248,16 @@ static int a5psw_probe(struct platform_device *pdev)
if (ret)
goto clk_disable;
- mdio = of_get_child_by_name(dev->of_node, "mdio");
- if (of_device_is_available(mdio)) {
+ mdio = of_get_available_child_by_name(dev->of_node, "mdio");
+ if (mdio) {
ret = a5psw_probe_mdio(a5psw, mdio);
+ of_node_put(mdio);
if (ret) {
- of_node_put(mdio);
dev_err(dev, "Failed to register MDIO: %d\n", ret);
goto hclk_disable;
}
}
- of_node_put(mdio);
-
ds = &a5psw->ds;
ds->dev = dev;
ds->num_ports = A5PSW_PORTS_NUM;
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 84b7169f2974..8d535c033cef 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -468,13 +468,10 @@ int sja1105_mdiobus_register(struct dsa_switch *ds)
if (rc)
return rc;
- mdio_node = of_get_child_by_name(switch_node, "mdios");
+ mdio_node = of_get_available_child_by_name(switch_node, "mdios");
if (!mdio_node)
return 0;
- if (!of_device_is_available(mdio_node))
- goto out_put_mdio_node;
-
if (regs->mdio_100base_tx != SJA1105_RSV_ADDR) {
rc = sja1105_mdiobus_base_tx_register(priv, mdio_node);
if (rc)
@@ -487,7 +484,6 @@ int sja1105_mdiobus_register(struct dsa_switch *ds)
goto err_free_base_tx_mdiobus;
}
-out_put_mdio_node:
of_node_put(mdio_node);
return 0;
diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c
index 115f48b3342c..0a08da799255 100644
--- a/drivers/net/ethernet/actions/owl-emac.c
+++ b/drivers/net/ethernet/actions/owl-emac.c
@@ -1325,15 +1325,10 @@ static int owl_emac_mdio_init(struct net_device *netdev)
struct device_node *mdio_node;
int ret;
- mdio_node = of_get_child_by_name(dev->of_node, "mdio");
+ mdio_node = of_get_available_child_by_name(dev->of_node, "mdio");
if (!mdio_node)
return -ENODEV;
- if (!of_device_is_available(mdio_node)) {
- ret = -ENODEV;
- goto err_put_node;
- }
-
priv->mii = devm_mdiobus_alloc(dev);
if (!priv->mii) {
ret = -ENOMEM;
diff --git a/drivers/net/ethernet/apm/xgene-v2/mdio.c b/drivers/net/ethernet/apm/xgene-v2/mdio.c
index eba06831aec2..6a17045a5f62 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mdio.c
+++ b/drivers/net/ethernet/apm/xgene-v2/mdio.c
@@ -97,7 +97,6 @@ void xge_mdio_remove(struct net_device *ndev)
int xge_mdio_config(struct net_device *ndev)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
struct xge_pdata *pdata = netdev_priv(ndev);
struct device *dev = &pdata->pdev->dev;
struct mii_bus *mdio_bus;
@@ -137,17 +136,12 @@ int xge_mdio_config(struct net_device *ndev)
goto err;
}
- linkmode_set_bit_array(phy_10_100_features_array,
- ARRAY_SIZE(phy_10_100_features_array),
- mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_AUI_BIT, mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_BNC_BIT, mask);
-
- linkmode_andnot(phydev->supported, phydev->supported, mask);
- linkmode_copy(phydev->advertising, phydev->supported);
+ phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+ phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT);
+ phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+ phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+ phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+
pdata->phy_speed = SPEED_UNKNOWN;
return 0;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
index 414b2e448d59..787ea91802e7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
@@ -113,19 +113,9 @@ static const struct hwmon_ops aq_hwmon_ops = {
.read_string = aq_hwmon_read_string,
};
-static u32 aq_hwmon_temp_config[] = {
- HWMON_T_INPUT | HWMON_T_LABEL,
- HWMON_T_INPUT | HWMON_T_LABEL,
- 0,
-};
-
-static const struct hwmon_channel_info aq_hwmon_temp = {
- .type = hwmon_temp,
- .config = aq_hwmon_temp_config,
-};
-
static const struct hwmon_channel_info * const aq_hwmon_info[] = {
- &aq_hwmon_temp,
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL),
NULL,
};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index f5901f8e3907..f6b990b7f5b4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -226,7 +226,6 @@ struct __packed offload_info {
struct offload_port_info ports;
struct offload_ka_info kas;
struct offload_rr_info rrs;
- u8 buf[];
};
struct __packed hw_atl_utils_fw_rpc {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7b8b5b39c7bb..15c57a06ecaf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -55,6 +55,8 @@
#include <net/page_pool/helpers.h>
#include <linux/align.h>
#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+#include <linux/pci-tph.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
@@ -76,6 +78,7 @@
#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \
NETIF_MSG_TX_ERR)
+MODULE_IMPORT_NS("NETDEV_INTERNAL");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Broadcom NetXtreme network driver");
@@ -3314,74 +3317,81 @@ poll_done:
return work_done;
}
-static void bnxt_free_tx_skbs(struct bnxt *bp)
+static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp,
+ struct bnxt_tx_ring_info *txr, int idx)
{
int i, max_idx;
struct pci_dev *pdev = bp->pdev;
- if (!bp->tx_ring)
- return;
-
max_idx = bp->tx_nr_pages * TX_DESC_CNT;
- for (i = 0; i < bp->tx_nr_rings; i++) {
- struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
- int j;
- if (!txr->tx_buf_ring)
+ for (i = 0; i < max_idx;) {
+ struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[i];
+ struct sk_buff *skb;
+ int j, last;
+
+ if (idx < bp->tx_nr_rings_xdp &&
+ tx_buf->action == XDP_REDIRECT) {
+ dma_unmap_single(&pdev->dev,
+ dma_unmap_addr(tx_buf, mapping),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ xdp_return_frame(tx_buf->xdpf);
+ tx_buf->action = 0;
+ tx_buf->xdpf = NULL;
+ i++;
continue;
+ }
- for (j = 0; j < max_idx;) {
- struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j];
- struct sk_buff *skb;
- int k, last;
-
- if (i < bp->tx_nr_rings_xdp &&
- tx_buf->action == XDP_REDIRECT) {
- dma_unmap_single(&pdev->dev,
- dma_unmap_addr(tx_buf, mapping),
- dma_unmap_len(tx_buf, len),
- DMA_TO_DEVICE);
- xdp_return_frame(tx_buf->xdpf);
- tx_buf->action = 0;
- tx_buf->xdpf = NULL;
- j++;
- continue;
- }
+ skb = tx_buf->skb;
+ if (!skb) {
+ i++;
+ continue;
+ }
- skb = tx_buf->skb;
- if (!skb) {
- j++;
- continue;
- }
+ tx_buf->skb = NULL;
- tx_buf->skb = NULL;
+ if (tx_buf->is_push) {
+ dev_kfree_skb(skb);
+ i += 2;
+ continue;
+ }
- if (tx_buf->is_push) {
- dev_kfree_skb(skb);
- j += 2;
- continue;
- }
+ dma_unmap_single(&pdev->dev,
+ dma_unmap_addr(tx_buf, mapping),
+ skb_headlen(skb),
+ DMA_TO_DEVICE);
- dma_unmap_single(&pdev->dev,
- dma_unmap_addr(tx_buf, mapping),
- skb_headlen(skb),
- DMA_TO_DEVICE);
+ last = tx_buf->nr_frags;
+ i += 2;
+ for (j = 0; j < last; j++, i++) {
+ int ring_idx = i & bp->tx_ring_mask;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[j];
- last = tx_buf->nr_frags;
- j += 2;
- for (k = 0; k < last; k++, j++) {
- int ring_idx = j & bp->tx_ring_mask;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
-
- tx_buf = &txr->tx_buf_ring[ring_idx];
- dma_unmap_page(
- &pdev->dev,
- dma_unmap_addr(tx_buf, mapping),
- skb_frag_size(frag), DMA_TO_DEVICE);
- }
- dev_kfree_skb(skb);
+ tx_buf = &txr->tx_buf_ring[ring_idx];
+ dma_unmap_page(&pdev->dev,
+ dma_unmap_addr(tx_buf, mapping),
+ skb_frag_size(frag), DMA_TO_DEVICE);
}
- netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, i));
+ dev_kfree_skb(skb);
+ }
+ netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, idx));
+}
+
+static void bnxt_free_tx_skbs(struct bnxt *bp)
+{
+ int i;
+
+ if (!bp->tx_ring)
+ return;
+
+ for (i = 0; i < bp->tx_nr_rings; i++) {
+ struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
+
+ if (!txr->tx_buf_ring)
+ continue;
+
+ bnxt_free_one_tx_ring_skbs(bp, txr, i);
}
}
@@ -5565,6 +5575,8 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT |
FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT;
+ if (bp->fw_cap & BNXT_FW_CAP_NPAR_1_2)
+ flags |= FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT;
req->flags = cpu_to_le32(flags);
req->ver_maj_8b = DRV_VER_MAJ;
req->ver_min_8b = DRV_VER_MIN;
@@ -6935,6 +6947,30 @@ static void bnxt_hwrm_ring_grp_free(struct bnxt *bp)
hwrm_req_drop(bp, req);
}
+static void bnxt_set_rx_ring_params_p5(struct bnxt *bp, u32 ring_type,
+ struct hwrm_ring_alloc_input *req,
+ struct bnxt_ring_struct *ring)
+{
+ struct bnxt_ring_grp_info *grp_info = &bp->grp_info[ring->grp_idx];
+ u32 enables = RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID |
+ RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID;
+
+ if (ring_type == HWRM_RING_ALLOC_AGG) {
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
+ req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id);
+ req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE);
+ enables |= RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID;
+ } else {
+ req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size);
+ if (NET_IP_ALIGN == 2)
+ req->flags =
+ cpu_to_le16(RING_ALLOC_REQ_FLAGS_RX_SOP_PAD);
+ }
+ req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+ req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
+ req->enables |= cpu_to_le32(enables);
+}
+
static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
struct bnxt_ring_struct *ring,
u32 ring_type, u32 map_index)
@@ -6986,37 +7022,13 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
break;
}
case HWRM_RING_ALLOC_RX:
- req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
- req->length = cpu_to_le32(bp->rx_ring_mask + 1);
- if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- u16 flags = 0;
-
- /* Association of rx ring with stats context */
- grp_info = &bp->grp_info[ring->grp_idx];
- req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size);
- req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
- req->enables |= cpu_to_le32(
- RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
- if (NET_IP_ALIGN == 2)
- flags = RING_ALLOC_REQ_FLAGS_RX_SOP_PAD;
- req->flags = cpu_to_le16(flags);
- }
- break;
case HWRM_RING_ALLOC_AGG:
- if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
- /* Association of agg ring with rx ring */
- grp_info = &bp->grp_info[ring->grp_idx];
- req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id);
- req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE);
- req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
- req->enables |= cpu_to_le32(
- RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID |
- RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
- } else {
- req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
- }
- req->length = cpu_to_le32(bp->rx_agg_ring_mask + 1);
+ req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
+ req->length = (ring_type == HWRM_RING_ALLOC_RX) ?
+ cpu_to_le32(bp->rx_ring_mask + 1) :
+ cpu_to_le32(bp->rx_agg_ring_mask + 1);
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ bnxt_set_rx_ring_params_p5(bp, ring_type, req, ring);
break;
case HWRM_RING_ALLOC_CMPL:
req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
@@ -7197,6 +7209,39 @@ static int bnxt_hwrm_rx_agg_ring_alloc(struct bnxt *bp,
return 0;
}
+static int bnxt_hwrm_cp_ring_alloc_p5(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr)
+{
+ const u32 type = HWRM_RING_ALLOC_CMPL;
+ struct bnxt_napi *bnapi = cpr->bnapi;
+ struct bnxt_ring_struct *ring;
+ u32 map_idx = bnapi->index;
+ int rc;
+
+ ring = &cpr->cp_ring_struct;
+ ring->handle = BNXT_SET_NQ_HDL(cpr);
+ rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx);
+ if (rc)
+ return rc;
+ bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id);
+ bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons);
+ return 0;
+}
+
+static int bnxt_hwrm_tx_ring_alloc(struct bnxt *bp,
+ struct bnxt_tx_ring_info *txr, u32 tx_idx)
+{
+ struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
+ const u32 type = HWRM_RING_ALLOC_TX;
+ int rc;
+
+ rc = hwrm_ring_alloc_send_msg(bp, ring, type, tx_idx);
+ if (rc)
+ return rc;
+ bnxt_set_db(bp, &txr->tx_db, type, tx_idx, ring->fw_ring_id);
+ return 0;
+}
+
static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
{
bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS);
@@ -7233,33 +7278,17 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
}
}
- type = HWRM_RING_ALLOC_TX;
for (i = 0; i < bp->tx_nr_rings; i++) {
struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
- struct bnxt_ring_struct *ring;
- u32 map_idx;
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- struct bnxt_cp_ring_info *cpr2 = txr->tx_cpr;
- struct bnxt_napi *bnapi = txr->bnapi;
- u32 type2 = HWRM_RING_ALLOC_CMPL;
-
- ring = &cpr2->cp_ring_struct;
- ring->handle = BNXT_SET_NQ_HDL(cpr2);
- map_idx = bnapi->index;
- rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
+ rc = bnxt_hwrm_cp_ring_alloc_p5(bp, txr->tx_cpr);
if (rc)
goto err_out;
- bnxt_set_db(bp, &cpr2->cp_db, type2, map_idx,
- ring->fw_ring_id);
- bnxt_db_cq(bp, &cpr2->cp_db, cpr2->cp_raw_cons);
}
- ring = &txr->tx_ring_struct;
- map_idx = i;
- rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx);
+ rc = bnxt_hwrm_tx_ring_alloc(bp, txr, i);
if (rc)
goto err_out;
- bnxt_set_db(bp, &txr->tx_db, type, map_idx, ring->fw_ring_id);
}
for (i = 0; i < bp->rx_nr_rings; i++) {
@@ -7272,20 +7301,9 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
if (!agg_rings)
bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- struct bnxt_cp_ring_info *cpr2 = rxr->rx_cpr;
- struct bnxt_napi *bnapi = rxr->bnapi;
- u32 type2 = HWRM_RING_ALLOC_CMPL;
- struct bnxt_ring_struct *ring;
- u32 map_idx = bnapi->index;
-
- ring = &cpr2->cp_ring_struct;
- ring->handle = BNXT_SET_NQ_HDL(cpr2);
- rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
+ rc = bnxt_hwrm_cp_ring_alloc_p5(bp, rxr->rx_cpr);
if (rc)
goto err_out;
- bnxt_set_db(bp, &cpr2->cp_db, type2, map_idx,
- ring->fw_ring_id);
- bnxt_db_cq(bp, &cpr2->cp_db, cpr2->cp_raw_cons);
}
}
@@ -7353,6 +7371,23 @@ exit:
return 0;
}
+static void bnxt_hwrm_tx_ring_free(struct bnxt *bp,
+ struct bnxt_tx_ring_info *txr,
+ bool close_path)
+{
+ struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
+ u32 cmpl_ring_id;
+
+ if (ring->fw_ring_id == INVALID_HW_RING_ID)
+ return;
+
+ cmpl_ring_id = close_path ? bnxt_cp_ring_for_tx(bp, txr) :
+ INVALID_HW_RING_ID;
+ hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_TX,
+ cmpl_ring_id);
+ ring->fw_ring_id = INVALID_HW_RING_ID;
+}
+
static void bnxt_hwrm_rx_ring_free(struct bnxt *bp,
struct bnxt_rx_ring_info *rxr,
bool close_path)
@@ -7397,6 +7432,33 @@ static void bnxt_hwrm_rx_agg_ring_free(struct bnxt *bp,
bp->grp_info[grp_idx].agg_fw_ring_id = INVALID_HW_RING_ID;
}
+static void bnxt_hwrm_cp_ring_free(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr)
+{
+ struct bnxt_ring_struct *ring;
+
+ ring = &cpr->cp_ring_struct;
+ if (ring->fw_ring_id == INVALID_HW_RING_ID)
+ return;
+
+ hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_L2_CMPL,
+ INVALID_HW_RING_ID);
+ ring->fw_ring_id = INVALID_HW_RING_ID;
+}
+
+static void bnxt_clear_one_cp_ring(struct bnxt *bp, struct bnxt_cp_ring_info *cpr)
+{
+ struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+ int i, size = ring->ring_mem.page_size;
+
+ cpr->cp_raw_cons = 0;
+ cpr->toggle = 0;
+
+ for (i = 0; i < bp->cp_nr_pages; i++)
+ if (cpr->cp_desc_ring[i])
+ memset(cpr->cp_desc_ring[i], 0, size);
+}
+
static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
{
u32 type;
@@ -7405,20 +7467,8 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
if (!bp->bnapi)
return;
- for (i = 0; i < bp->tx_nr_rings; i++) {
- struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
- struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
-
- if (ring->fw_ring_id != INVALID_HW_RING_ID) {
- u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
-
- hwrm_ring_free_send_msg(bp, ring,
- RING_FREE_REQ_RING_TYPE_TX,
- close_path ? cmpl_ring_id :
- INVALID_HW_RING_ID);
- ring->fw_ring_id = INVALID_HW_RING_ID;
- }
- }
+ for (i = 0; i < bp->tx_nr_rings; i++)
+ bnxt_hwrm_tx_ring_free(bp, &bp->tx_ring[i], close_path);
bnxt_cancel_dim(bp);
for (i = 0; i < bp->rx_nr_rings; i++) {
@@ -7442,17 +7492,9 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
struct bnxt_ring_struct *ring;
int j;
- for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) {
- struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
+ for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++)
+ bnxt_hwrm_cp_ring_free(bp, &cpr->cp_ring_arr[j]);
- ring = &cpr2->cp_ring_struct;
- if (ring->fw_ring_id == INVALID_HW_RING_ID)
- continue;
- hwrm_ring_free_send_msg(bp, ring,
- RING_FREE_REQ_RING_TYPE_L2_CMPL,
- INVALID_HW_RING_ID);
- ring->fw_ring_id = INVALID_HW_RING_ID;
- }
ring = &cpr->cp_ring_struct;
if (ring->fw_ring_id != INVALID_HW_RING_ID) {
hwrm_ring_free_send_msg(bp, ring, type,
@@ -8365,6 +8407,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
switch (resp->port_partition_type) {
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
+ case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_2:
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0:
bp->port_partition_type = resp->port_partition_type;
@@ -9529,6 +9572,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH;
+ if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_NPAR_1_2_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_NPAR_1_2;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_DFLT_VLAN_TPID_PCP_SUPPORTED))
bp->fw_cap |= BNXT_FW_CAP_DFLT_VLAN_TPID_PCP;
if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED)
@@ -11237,6 +11282,155 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
return 0;
}
+static void bnxt_tx_queue_stop(struct bnxt *bp, int idx)
+{
+ struct bnxt_tx_ring_info *txr;
+ struct netdev_queue *txq;
+ struct bnxt_napi *bnapi;
+ int i;
+
+ bnapi = bp->bnapi[idx];
+ bnxt_for_each_napi_tx(i, bnapi, txr) {
+ WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING);
+ synchronize_net();
+
+ if (!(bnapi->flags & BNXT_NAPI_FLAG_XDP)) {
+ txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
+ if (txq) {
+ __netif_tx_lock_bh(txq);
+ netif_tx_stop_queue(txq);
+ __netif_tx_unlock_bh(txq);
+ }
+ }
+
+ if (!bp->tph_mode)
+ continue;
+
+ bnxt_hwrm_tx_ring_free(bp, txr, true);
+ bnxt_hwrm_cp_ring_free(bp, txr->tx_cpr);
+ bnxt_free_one_tx_ring_skbs(bp, txr, txr->txq_index);
+ bnxt_clear_one_cp_ring(bp, txr->tx_cpr);
+ }
+}
+
+static int bnxt_tx_queue_start(struct bnxt *bp, int idx)
+{
+ struct bnxt_tx_ring_info *txr;
+ struct netdev_queue *txq;
+ struct bnxt_napi *bnapi;
+ int rc, i;
+
+ bnapi = bp->bnapi[idx];
+ /* All rings have been reserved and previously allocated.
+ * Reallocating with the same parameters should never fail.
+ */
+ bnxt_for_each_napi_tx(i, bnapi, txr) {
+ if (!bp->tph_mode)
+ goto start_tx;
+
+ rc = bnxt_hwrm_cp_ring_alloc_p5(bp, txr->tx_cpr);
+ if (rc)
+ return rc;
+
+ rc = bnxt_hwrm_tx_ring_alloc(bp, txr, false);
+ if (rc)
+ return rc;
+
+ txr->tx_prod = 0;
+ txr->tx_cons = 0;
+ txr->tx_hw_cons = 0;
+start_tx:
+ WRITE_ONCE(txr->dev_state, 0);
+ synchronize_net();
+
+ if (bnapi->flags & BNXT_NAPI_FLAG_XDP)
+ continue;
+
+ txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
+ if (txq)
+ netif_tx_start_queue(txq);
+ }
+
+ return 0;
+}
+
+static void bnxt_irq_affinity_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct bnxt_irq *irq;
+ u16 tag;
+ int err;
+
+ irq = container_of(notify, struct bnxt_irq, affinity_notify);
+
+ if (!irq->bp->tph_mode)
+ return;
+
+ cpumask_copy(irq->cpu_mask, mask);
+
+ if (irq->ring_nr >= irq->bp->rx_nr_rings)
+ return;
+
+ if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM,
+ cpumask_first(irq->cpu_mask), &tag))
+ return;
+
+ if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag))
+ return;
+
+ rtnl_lock();
+ if (netif_running(irq->bp->dev)) {
+ err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr);
+ if (err)
+ netdev_err(irq->bp->dev,
+ "RX queue restart failed: err=%d\n", err);
+ }
+ rtnl_unlock();
+}
+
+static void bnxt_irq_affinity_release(struct kref *ref)
+{
+ struct irq_affinity_notify *notify =
+ container_of(ref, struct irq_affinity_notify, kref);
+ struct bnxt_irq *irq;
+
+ irq = container_of(notify, struct bnxt_irq, affinity_notify);
+
+ if (!irq->bp->tph_mode)
+ return;
+
+ if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, 0)) {
+ netdev_err(irq->bp->dev,
+ "Setting ST=0 for MSIX entry %d failed\n",
+ irq->msix_nr);
+ return;
+ }
+}
+
+static void bnxt_release_irq_notifier(struct bnxt_irq *irq)
+{
+ irq_set_affinity_notifier(irq->vector, NULL);
+}
+
+static void bnxt_register_irq_notifier(struct bnxt *bp, struct bnxt_irq *irq)
+{
+ struct irq_affinity_notify *notify;
+
+ irq->bp = bp;
+
+ /* Nothing to do if TPH is not enabled */
+ if (!bp->tph_mode)
+ return;
+
+ /* Register IRQ affinity notifier */
+ notify = &irq->affinity_notify;
+ notify->irq = irq->vector;
+ notify->notify = bnxt_irq_affinity_notify;
+ notify->release = bnxt_irq_affinity_release;
+
+ irq_set_affinity_notifier(irq->vector, notify);
+}
+
static void bnxt_free_irq(struct bnxt *bp)
{
struct bnxt_irq *irq;
@@ -11259,11 +11453,18 @@ static void bnxt_free_irq(struct bnxt *bp)
free_cpumask_var(irq->cpu_mask);
irq->have_cpumask = 0;
}
+
+ bnxt_release_irq_notifier(irq);
+
free_irq(irq->vector, bp->bnapi[i]);
}
irq->requested = 0;
}
+
+ /* Disable TPH support */
+ pcie_disable_tph(bp->pdev);
+ bp->tph_mode = 0;
}
static int bnxt_request_irq(struct bnxt *bp)
@@ -11283,6 +11484,12 @@ static int bnxt_request_irq(struct bnxt *bp)
#ifdef CONFIG_RFS_ACCEL
rmap = bp->dev->rx_cpu_rmap;
#endif
+
+ /* Enable TPH support as part of IRQ request */
+ rc = pcie_enable_tph(bp->pdev, PCI_TPH_ST_IV_MODE);
+ if (!rc)
+ bp->tph_mode = PCI_TPH_ST_IV_MODE;
+
for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
int map_idx = bnxt_cp_num_to_irq_num(bp, i);
struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
@@ -11306,8 +11513,11 @@ static int bnxt_request_irq(struct bnxt *bp)
if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) {
int numa_node = dev_to_node(&bp->pdev->dev);
+ u16 tag;
irq->have_cpumask = 1;
+ irq->msix_nr = map_idx;
+ irq->ring_nr = i;
cpumask_set_cpu(cpumask_local_spread(i, numa_node),
irq->cpu_mask);
rc = irq_update_affinity_hint(irq->vector, irq->cpu_mask);
@@ -11317,6 +11527,16 @@ static int bnxt_request_irq(struct bnxt *bp)
irq->vector);
break;
}
+
+ bnxt_register_irq_notifier(bp, irq);
+
+ /* Init ST table entry */
+ if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM,
+ cpumask_first(irq->cpu_mask),
+ &tag))
+ continue;
+
+ pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag);
}
}
return rc;
@@ -15601,6 +15821,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
struct bnxt_rx_ring_info *rxr, *clone;
struct bnxt_cp_ring_info *cpr;
struct bnxt_vnic_info *vnic;
+ struct bnxt_napi *bnapi;
int i, rc;
rxr = &bp->rx_ring[idx];
@@ -15618,19 +15839,38 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
bnxt_copy_rx_ring(bp, rxr, clone);
+ bnapi = rxr->bnapi;
+ cpr = &bnapi->cp_ring;
+
+ /* All rings have been reserved and previously allocated.
+ * Reallocating with the same parameters should never fail.
+ */
rc = bnxt_hwrm_rx_ring_alloc(bp, rxr);
if (rc)
- return rc;
+ goto err_reset;
+
+ if (bp->tph_mode) {
+ rc = bnxt_hwrm_cp_ring_alloc_p5(bp, rxr->rx_cpr);
+ if (rc)
+ goto err_reset;
+ }
+
rc = bnxt_hwrm_rx_agg_ring_alloc(bp, rxr);
if (rc)
- goto err_free_hwrm_rx_ring;
+ goto err_reset;
bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
if (bp->flags & BNXT_FLAG_AGG_RINGS)
bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
- cpr = &rxr->bnapi->cp_ring;
- cpr->sw_stats->rx.rx_resets++;
+ if (bp->flags & BNXT_FLAG_SHARED_RINGS) {
+ rc = bnxt_tx_queue_start(bp, idx);
+ if (rc)
+ goto err_reset;
+ }
+
+ napi_enable(&bnapi->napi);
+ bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
vnic = &bp->vnic_info[i];
@@ -15648,8 +15888,12 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
return 0;
-err_free_hwrm_rx_ring:
- bnxt_hwrm_rx_ring_free(bp, rxr, false);
+err_reset:
+ netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n",
+ rc);
+ napi_enable(&bnapi->napi);
+ bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
+ bnxt_reset_task(bp, true);
return rc;
}
@@ -15657,7 +15901,9 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
{
struct bnxt *bp = netdev_priv(dev);
struct bnxt_rx_ring_info *rxr;
+ struct bnxt_cp_ring_info *cpr;
struct bnxt_vnic_info *vnic;
+ struct bnxt_napi *bnapi;
int i;
for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
@@ -15669,14 +15915,30 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
/* Make sure NAPI sees that the VNIC is disabled */
synchronize_net();
rxr = &bp->rx_ring[idx];
- cancel_work_sync(&rxr->bnapi->cp_ring.dim.work);
+ bnapi = rxr->bnapi;
+ cpr = &bnapi->cp_ring;
+ cancel_work_sync(&cpr->dim.work);
bnxt_hwrm_rx_ring_free(bp, rxr, false);
bnxt_hwrm_rx_agg_ring_free(bp, rxr, false);
- rxr->rx_next_cons = 0;
page_pool_disable_direct_recycling(rxr->page_pool);
if (bnxt_separate_head_pool())
page_pool_disable_direct_recycling(rxr->head_pool);
+ if (bp->flags & BNXT_FLAG_SHARED_RINGS)
+ bnxt_tx_queue_stop(bp, idx);
+
+ /* Disable NAPI now after freeing the rings because HWRM_RING_FREE
+ * completion is handled in NAPI to guarantee no more DMA on that ring
+ * after seeing the completion.
+ */
+ napi_disable(&bnapi->napi);
+
+ if (bp->tph_mode) {
+ bnxt_hwrm_cp_ring_free(bp, rxr->rx_cpr);
+ bnxt_clear_one_cp_ring(bp, rxr->rx_cpr);
+ }
+ bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons);
+
memcpy(qmem, rxr, sizeof(*rxr));
bnxt_init_rx_ring_struct(bp, qmem);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 2373f423a523..e85b5ce94f58 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1234,6 +1234,11 @@ struct bnxt_irq {
u8 have_cpumask:1;
char name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA];
cpumask_var_t cpu_mask;
+
+ struct bnxt *bp;
+ int msix_nr;
+ int ring_nr;
+ struct irq_affinity_notify affinity_notify;
};
#define HWRM_RING_ALLOC_TX 0x1
@@ -2410,6 +2415,8 @@ struct bnxt {
u8 max_q;
u8 num_tc;
+ u8 tph_mode;
+
unsigned int current_interval;
#define BNXT_TIMER_INTERVAL HZ
@@ -2492,6 +2499,7 @@ struct bnxt {
#define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3 BIT_ULL(39)
#define BNXT_FW_CAP_VNIC_RE_FLUSH BIT_ULL(40)
#define BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS BIT_ULL(41)
+ #define BNXT_FW_CAP_NPAR_1_2 BIT_ULL(42)
u32 fw_dbg_cap;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 5740c98d8c9f..f69b2b7c8802 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -951,75 +951,73 @@ struct macb_tx_skb {
* device stats by a periodic timer.
*/
struct macb_stats {
- u32 rx_pause_frames;
- u32 tx_ok;
- u32 tx_single_cols;
- u32 tx_multiple_cols;
- u32 rx_ok;
- u32 rx_fcs_errors;
- u32 rx_align_errors;
- u32 tx_deferred;
- u32 tx_late_cols;
- u32 tx_excessive_cols;
- u32 tx_underruns;
- u32 tx_carrier_errors;
- u32 rx_resource_errors;
- u32 rx_overruns;
- u32 rx_symbol_errors;
- u32 rx_oversize_pkts;
- u32 rx_jabbers;
- u32 rx_undersize_pkts;
- u32 sqe_test_errors;
- u32 rx_length_mismatch;
- u32 tx_pause_frames;
+ u64 rx_pause_frames;
+ u64 tx_ok;
+ u64 tx_single_cols;
+ u64 tx_multiple_cols;
+ u64 rx_ok;
+ u64 rx_fcs_errors;
+ u64 rx_align_errors;
+ u64 tx_deferred;
+ u64 tx_late_cols;
+ u64 tx_excessive_cols;
+ u64 tx_underruns;
+ u64 tx_carrier_errors;
+ u64 rx_resource_errors;
+ u64 rx_overruns;
+ u64 rx_symbol_errors;
+ u64 rx_oversize_pkts;
+ u64 rx_jabbers;
+ u64 rx_undersize_pkts;
+ u64 sqe_test_errors;
+ u64 rx_length_mismatch;
+ u64 tx_pause_frames;
};
struct gem_stats {
- u32 tx_octets_31_0;
- u32 tx_octets_47_32;
- u32 tx_frames;
- u32 tx_broadcast_frames;
- u32 tx_multicast_frames;
- u32 tx_pause_frames;
- u32 tx_64_byte_frames;
- u32 tx_65_127_byte_frames;
- u32 tx_128_255_byte_frames;
- u32 tx_256_511_byte_frames;
- u32 tx_512_1023_byte_frames;
- u32 tx_1024_1518_byte_frames;
- u32 tx_greater_than_1518_byte_frames;
- u32 tx_underrun;
- u32 tx_single_collision_frames;
- u32 tx_multiple_collision_frames;
- u32 tx_excessive_collisions;
- u32 tx_late_collisions;
- u32 tx_deferred_frames;
- u32 tx_carrier_sense_errors;
- u32 rx_octets_31_0;
- u32 rx_octets_47_32;
- u32 rx_frames;
- u32 rx_broadcast_frames;
- u32 rx_multicast_frames;
- u32 rx_pause_frames;
- u32 rx_64_byte_frames;
- u32 rx_65_127_byte_frames;
- u32 rx_128_255_byte_frames;
- u32 rx_256_511_byte_frames;
- u32 rx_512_1023_byte_frames;
- u32 rx_1024_1518_byte_frames;
- u32 rx_greater_than_1518_byte_frames;
- u32 rx_undersized_frames;
- u32 rx_oversize_frames;
- u32 rx_jabbers;
- u32 rx_frame_check_sequence_errors;
- u32 rx_length_field_frame_errors;
- u32 rx_symbol_errors;
- u32 rx_alignment_errors;
- u32 rx_resource_errors;
- u32 rx_overruns;
- u32 rx_ip_header_checksum_errors;
- u32 rx_tcp_checksum_errors;
- u32 rx_udp_checksum_errors;
+ u64 tx_octets;
+ u64 tx_frames;
+ u64 tx_broadcast_frames;
+ u64 tx_multicast_frames;
+ u64 tx_pause_frames;
+ u64 tx_64_byte_frames;
+ u64 tx_65_127_byte_frames;
+ u64 tx_128_255_byte_frames;
+ u64 tx_256_511_byte_frames;
+ u64 tx_512_1023_byte_frames;
+ u64 tx_1024_1518_byte_frames;
+ u64 tx_greater_than_1518_byte_frames;
+ u64 tx_underrun;
+ u64 tx_single_collision_frames;
+ u64 tx_multiple_collision_frames;
+ u64 tx_excessive_collisions;
+ u64 tx_late_collisions;
+ u64 tx_deferred_frames;
+ u64 tx_carrier_sense_errors;
+ u64 rx_octets;
+ u64 rx_frames;
+ u64 rx_broadcast_frames;
+ u64 rx_multicast_frames;
+ u64 rx_pause_frames;
+ u64 rx_64_byte_frames;
+ u64 rx_65_127_byte_frames;
+ u64 rx_128_255_byte_frames;
+ u64 rx_256_511_byte_frames;
+ u64 rx_512_1023_byte_frames;
+ u64 rx_1024_1518_byte_frames;
+ u64 rx_greater_than_1518_byte_frames;
+ u64 rx_undersized_frames;
+ u64 rx_oversize_frames;
+ u64 rx_jabbers;
+ u64 rx_frame_check_sequence_errors;
+ u64 rx_length_field_frame_errors;
+ u64 rx_symbol_errors;
+ u64 rx_alignment_errors;
+ u64 rx_resource_errors;
+ u64 rx_overruns;
+ u64 rx_ip_header_checksum_errors;
+ u64 rx_tcp_checksum_errors;
+ u64 rx_udp_checksum_errors;
};
/* Describes the name and offset of an individual statistic register, as
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 48496209fb16..2112a9701e05 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -853,9 +853,7 @@ static int macb_mii_probe(struct net_device *dev)
struct macb *bp = netdev_priv(dev);
bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops;
- bp->phylink_sgmii_pcs.neg_mode = true;
bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops;
- bp->phylink_usx_pcs.neg_mode = true;
bp->phylink_config.dev = &dev->dev;
bp->phylink_config.type = PHYLINK_NETDEV;
@@ -990,8 +988,8 @@ err_out:
static void macb_update_stats(struct macb *bp)
{
- u32 *p = &bp->hw_stats.macb.rx_pause_frames;
- u32 *end = &bp->hw_stats.macb.tx_pause_frames + 1;
+ u64 *p = &bp->hw_stats.macb.rx_pause_frames;
+ u64 *end = &bp->hw_stats.macb.tx_pause_frames + 1;
int offset = MACB_PFR;
WARN_ON((unsigned long)(end - p - 1) != (MACB_TPF - MACB_PFR) / 4);
@@ -3071,7 +3069,7 @@ static void gem_update_stats(struct macb *bp)
unsigned int i, q, idx;
unsigned long *stat;
- u32 *p = &bp->hw_stats.gem.tx_octets_31_0;
+ u64 *p = &bp->hw_stats.gem.tx_octets;
for (i = 0; i < GEM_STATS_LEN; ++i, ++p) {
u32 offset = gem_statistics[i].offset;
@@ -3084,7 +3082,7 @@ static void gem_update_stats(struct macb *bp)
/* Add GEM_OCTTXH, GEM_OCTRXH */
val = bp->macb_reg_readl(bp, offset + 4);
bp->ethtool_stats[i] += ((u64)val) << 32;
- *(++p) += val;
+ *(p++) += ((u64)val) << 32;
}
}
@@ -3094,15 +3092,12 @@ static void gem_update_stats(struct macb *bp)
bp->ethtool_stats[idx++] = *stat;
}
-static struct net_device_stats *gem_get_stats(struct macb *bp)
+static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat)
{
struct gem_stats *hwstat = &bp->hw_stats.gem;
- struct net_device_stats *nstat = &bp->dev->stats;
- if (!netif_running(bp->dev))
- return nstat;
-
- gem_update_stats(bp);
+ if (netif_running(bp->dev))
+ gem_update_stats(bp);
nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
hwstat->rx_alignment_errors +
@@ -3131,8 +3126,6 @@ static struct net_device_stats *gem_get_stats(struct macb *bp)
nstat->tx_aborted_errors = hwstat->tx_excessive_collisions;
nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors;
nstat->tx_fifo_errors = hwstat->tx_underrun;
-
- return nstat;
}
static void gem_get_ethtool_stats(struct net_device *dev,
@@ -3183,14 +3176,17 @@ static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
}
}
-static struct net_device_stats *macb_get_stats(struct net_device *dev)
+static void macb_get_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nstat)
{
struct macb *bp = netdev_priv(dev);
- struct net_device_stats *nstat = &bp->dev->stats;
struct macb_stats *hwstat = &bp->hw_stats.macb;
- if (macb_is_gem(bp))
- return gem_get_stats(bp);
+ netdev_stats_to_stats64(nstat, &bp->dev->stats);
+ if (macb_is_gem(bp)) {
+ gem_get_stats(bp, nstat);
+ return;
+ }
/* read stats from hardware */
macb_update_stats(bp);
@@ -3226,8 +3222,154 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev)
nstat->tx_carrier_errors = hwstat->tx_carrier_errors;
nstat->tx_fifo_errors = hwstat->tx_underruns;
/* Don't know about heartbeat or window errors... */
+}
+
+static void macb_get_pause_stats(struct net_device *dev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct macb_stats *hwstat = &bp->hw_stats.macb;
+
+ macb_update_stats(bp);
+ pause_stats->tx_pause_frames = hwstat->tx_pause_frames;
+ pause_stats->rx_pause_frames = hwstat->rx_pause_frames;
+}
+
+static void gem_get_pause_stats(struct net_device *dev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct gem_stats *hwstat = &bp->hw_stats.gem;
+
+ gem_update_stats(bp);
+ pause_stats->tx_pause_frames = hwstat->tx_pause_frames;
+ pause_stats->rx_pause_frames = hwstat->rx_pause_frames;
+}
+
+static void macb_get_eth_mac_stats(struct net_device *dev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct macb_stats *hwstat = &bp->hw_stats.macb;
+
+ macb_update_stats(bp);
+ mac_stats->FramesTransmittedOK = hwstat->tx_ok;
+ mac_stats->SingleCollisionFrames = hwstat->tx_single_cols;
+ mac_stats->MultipleCollisionFrames = hwstat->tx_multiple_cols;
+ mac_stats->FramesReceivedOK = hwstat->rx_ok;
+ mac_stats->FrameCheckSequenceErrors = hwstat->rx_fcs_errors;
+ mac_stats->AlignmentErrors = hwstat->rx_align_errors;
+ mac_stats->FramesWithDeferredXmissions = hwstat->tx_deferred;
+ mac_stats->LateCollisions = hwstat->tx_late_cols;
+ mac_stats->FramesAbortedDueToXSColls = hwstat->tx_excessive_cols;
+ mac_stats->FramesLostDueToIntMACXmitError = hwstat->tx_underruns;
+ mac_stats->CarrierSenseErrors = hwstat->tx_carrier_errors;
+ mac_stats->FramesLostDueToIntMACRcvError = hwstat->rx_overruns;
+ mac_stats->InRangeLengthErrors = hwstat->rx_length_mismatch;
+ mac_stats->FrameTooLongErrors = hwstat->rx_oversize_pkts;
+}
+
+static void gem_get_eth_mac_stats(struct net_device *dev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct gem_stats *hwstat = &bp->hw_stats.gem;
+
+ gem_update_stats(bp);
+ mac_stats->FramesTransmittedOK = hwstat->tx_frames;
+ mac_stats->SingleCollisionFrames = hwstat->tx_single_collision_frames;
+ mac_stats->MultipleCollisionFrames =
+ hwstat->tx_multiple_collision_frames;
+ mac_stats->FramesReceivedOK = hwstat->rx_frames;
+ mac_stats->FrameCheckSequenceErrors =
+ hwstat->rx_frame_check_sequence_errors;
+ mac_stats->AlignmentErrors = hwstat->rx_alignment_errors;
+ mac_stats->OctetsTransmittedOK = hwstat->tx_octets;
+ mac_stats->FramesWithDeferredXmissions = hwstat->tx_deferred_frames;
+ mac_stats->LateCollisions = hwstat->tx_late_collisions;
+ mac_stats->FramesAbortedDueToXSColls = hwstat->tx_excessive_collisions;
+ mac_stats->FramesLostDueToIntMACXmitError = hwstat->tx_underrun;
+ mac_stats->CarrierSenseErrors = hwstat->tx_carrier_sense_errors;
+ mac_stats->OctetsReceivedOK = hwstat->rx_octets;
+ mac_stats->MulticastFramesXmittedOK = hwstat->tx_multicast_frames;
+ mac_stats->BroadcastFramesXmittedOK = hwstat->tx_broadcast_frames;
+ mac_stats->MulticastFramesReceivedOK = hwstat->rx_multicast_frames;
+ mac_stats->BroadcastFramesReceivedOK = hwstat->rx_broadcast_frames;
+ mac_stats->InRangeLengthErrors = hwstat->rx_length_field_frame_errors;
+ mac_stats->FrameTooLongErrors = hwstat->rx_oversize_frames;
+}
+
+/* TODO: Report SQE test errors when added to phy_stats */
+static void macb_get_eth_phy_stats(struct net_device *dev,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct macb_stats *hwstat = &bp->hw_stats.macb;
- return nstat;
+ macb_update_stats(bp);
+ phy_stats->SymbolErrorDuringCarrier = hwstat->rx_symbol_errors;
+}
+
+static void gem_get_eth_phy_stats(struct net_device *dev,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct gem_stats *hwstat = &bp->hw_stats.gem;
+
+ gem_update_stats(bp);
+ phy_stats->SymbolErrorDuringCarrier = hwstat->rx_symbol_errors;
+}
+
+static void macb_get_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct macb_stats *hwstat = &bp->hw_stats.macb;
+
+ macb_update_stats(bp);
+ rmon_stats->undersize_pkts = hwstat->rx_undersize_pkts;
+ rmon_stats->oversize_pkts = hwstat->rx_oversize_pkts;
+ rmon_stats->jabbers = hwstat->rx_jabbers;
+}
+
+static const struct ethtool_rmon_hist_range gem_rmon_ranges[] = {
+ { 64, 64 },
+ { 65, 127 },
+ { 128, 255 },
+ { 256, 511 },
+ { 512, 1023 },
+ { 1024, 1518 },
+ { 1519, 16384 },
+ { },
+};
+
+static void gem_get_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct gem_stats *hwstat = &bp->hw_stats.gem;
+
+ gem_update_stats(bp);
+ rmon_stats->undersize_pkts = hwstat->rx_undersized_frames;
+ rmon_stats->oversize_pkts = hwstat->rx_oversize_frames;
+ rmon_stats->jabbers = hwstat->rx_jabbers;
+ rmon_stats->hist[0] = hwstat->rx_64_byte_frames;
+ rmon_stats->hist[1] = hwstat->rx_65_127_byte_frames;
+ rmon_stats->hist[2] = hwstat->rx_128_255_byte_frames;
+ rmon_stats->hist[3] = hwstat->rx_256_511_byte_frames;
+ rmon_stats->hist[4] = hwstat->rx_512_1023_byte_frames;
+ rmon_stats->hist[5] = hwstat->rx_1024_1518_byte_frames;
+ rmon_stats->hist[6] = hwstat->rx_greater_than_1518_byte_frames;
+ rmon_stats->hist_tx[0] = hwstat->tx_64_byte_frames;
+ rmon_stats->hist_tx[1] = hwstat->tx_65_127_byte_frames;
+ rmon_stats->hist_tx[2] = hwstat->tx_128_255_byte_frames;
+ rmon_stats->hist_tx[3] = hwstat->tx_256_511_byte_frames;
+ rmon_stats->hist_tx[4] = hwstat->tx_512_1023_byte_frames;
+ rmon_stats->hist_tx[5] = hwstat->tx_1024_1518_byte_frames;
+ rmon_stats->hist_tx[6] = hwstat->tx_greater_than_1518_byte_frames;
+ *ranges = gem_rmon_ranges;
}
static int macb_get_regs_len(struct net_device *netdev)
@@ -3756,6 +3898,10 @@ static const struct ethtool_ops macb_ethtool_ops = {
.get_regs = macb_get_regs,
.get_link = ethtool_op_get_link,
.get_ts_info = ethtool_op_get_ts_info,
+ .get_pause_stats = macb_get_pause_stats,
+ .get_eth_mac_stats = macb_get_eth_mac_stats,
+ .get_eth_phy_stats = macb_get_eth_phy_stats,
+ .get_rmon_stats = macb_get_rmon_stats,
.get_wol = macb_get_wol,
.set_wol = macb_set_wol,
.get_link_ksettings = macb_get_link_ksettings,
@@ -3774,6 +3920,10 @@ static const struct ethtool_ops gem_ethtool_ops = {
.get_ethtool_stats = gem_get_ethtool_stats,
.get_strings = gem_get_ethtool_strings,
.get_sset_count = gem_get_sset_count,
+ .get_pause_stats = gem_get_pause_stats,
+ .get_eth_mac_stats = gem_get_eth_mac_stats,
+ .get_eth_phy_stats = gem_get_eth_phy_stats,
+ .get_rmon_stats = gem_get_rmon_stats,
.get_link_ksettings = macb_get_link_ksettings,
.set_link_ksettings = macb_set_link_ksettings,
.get_ringparam = macb_get_ringparam,
@@ -3910,7 +4060,7 @@ static const struct net_device_ops macb_netdev_ops = {
.ndo_stop = macb_close,
.ndo_start_xmit = macb_start_xmit,
.ndo_set_rx_mode = macb_set_rx_mode,
- .ndo_get_stats = macb_get_stats,
+ .ndo_get_stats64 = macb_get_stats,
.ndo_eth_ioctl = macb_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = macb_change_mtu,
@@ -4571,7 +4721,7 @@ static const struct net_device_ops at91ether_netdev_ops = {
.ndo_open = at91ether_open,
.ndo_stop = at91ether_close,
.ndo_start_xmit = at91ether_start_xmit,
- .ndo_get_stats = macb_get_stats,
+ .ndo_get_stats64 = macb_get_stats,
.ndo_set_rx_mode = macb_set_rx_mode,
.ndo_set_mac_address = eth_mac_addr,
.ndo_eth_ioctl = macb_ioctl,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 6b6cb73482d7..1753bb87dfbd 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1433,22 +1433,6 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout)
}
EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init);
-/* Get the octeon id assigned to the octeon device passed as argument.
- * This function is exported to other modules.
- * @param dev - octeon device pointer passed as a void *.
- * @return octeon device id
- */
-int lio_get_device_id(void *dev)
-{
- struct octeon_device *octeon_dev = (struct octeon_device *)dev;
- u32 i;
-
- for (i = 0; i < MAX_OCTEON_DEVICES; i++)
- if (octeon_device[i] == octeon_dev)
- return octeon_dev->octeon_id;
- return -1;
-}
-
void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
{
u64 instr_cnt;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index d26364c2ac81..19344b21f8fb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -705,13 +705,6 @@ octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode,
*/
struct octeon_device *lio_get_device(u32 octeon_id);
-/** Get the octeon id assigned to the octeon device passed as argument.
- * This function is exported to other modules.
- * @param dev - octeon device pointer passed as a void *.
- * @return octeon device id
- */
-int lio_get_device_id(void *dev);
-
/** Read windowed register.
* @param oct - pointer to the Octeon device.
* @param addr - Address of the register to read.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index c7c2c15a1815..95e6f015a6af 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1211,9 +1211,6 @@ struct adapter {
struct timer_list flower_stats_timer;
struct work_struct flower_stats_work;
- /* Ethtool Dump */
- struct ethtool_dump eth_dump;
-
/* HMA */
struct hma_data hma;
@@ -1233,6 +1230,10 @@ struct adapter {
/* Ethtool n-tuple */
struct cxgb4_ethtool_filter *ethtool_filters;
+
+ /* Ethtool Dump */
+ /* Must be last - ends in a flex-array member. */
+ struct ethtool_dump eth_dump;
};
/* Support for "sched-class" command to allow a TX Scheduling Class to be
diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile
index c3b6febfdbe4..b3b5196b2dfc 100644
--- a/drivers/net/ethernet/cisco/enic/Makefile
+++ b/drivers/net/ethernet/cisco/enic/Makefile
@@ -3,5 +3,5 @@ obj-$(CONFIG_ENIC) := enic.o
enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \
enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \
- enic_ethtool.o enic_api.o enic_clsf.o
+ enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 10b7e02ba4d0..305ed12aa031 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -17,6 +17,7 @@
#include "vnic_nic.h"
#include "vnic_rss.h"
#include <linux/irq.h>
+#include <net/page_pool/helpers.h>
#define DRV_NAME "enic"
#define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver"
@@ -158,6 +159,7 @@ struct enic_rq_stats {
u64 pkt_truncated; /* truncated pkts */
u64 no_skb; /* out of skbs */
u64 desc_skip; /* Rx pkt went into later buffer */
+ u64 pp_alloc_fail; /* page pool alloc failure */
};
struct enic_wq {
@@ -169,6 +171,7 @@ struct enic_wq {
struct enic_rq {
struct vnic_rq vrq;
struct enic_rq_stats stats;
+ struct page_pool *pool;
} ____cacheline_aligned;
/* Per-instance private data structure */
@@ -223,7 +226,6 @@ struct enic {
unsigned int cq_avail;
unsigned int cq_count;
struct enic_rfs_flw_tbl rfs_h;
- u32 rx_copybreak;
u8 rss_key[ENIC_RSS_LEN];
struct vnic_gen_stats gen_stats;
};
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index d607b4f0542c..18b929fc2879 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -608,43 +608,6 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
return ret;
}
-static int enic_get_tunable(struct net_device *dev,
- const struct ethtool_tunable *tuna, void *data)
-{
- struct enic *enic = netdev_priv(dev);
- int ret = 0;
-
- switch (tuna->id) {
- case ETHTOOL_RX_COPYBREAK:
- *(u32 *)data = enic->rx_copybreak;
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-static int enic_set_tunable(struct net_device *dev,
- const struct ethtool_tunable *tuna,
- const void *data)
-{
- struct enic *enic = netdev_priv(dev);
- int ret = 0;
-
- switch (tuna->id) {
- case ETHTOOL_RX_COPYBREAK:
- enic->rx_copybreak = *(u32 *)data;
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
static u32 enic_get_rxfh_key_size(struct net_device *netdev)
{
return ENIC_RSS_LEN;
@@ -727,8 +690,6 @@ static const struct ethtool_ops enic_ethtool_ops = {
.get_coalesce = enic_get_coalesce,
.set_coalesce = enic_set_coalesce,
.get_rxnfc = enic_get_rxnfc,
- .get_tunable = enic_get_tunable,
- .set_tunable = enic_set_tunable,
.get_rxfh_key_size = enic_get_rxfh_key_size,
.get_rxfh = enic_get_rxfh,
.set_rxfh = enic_set_rxfh,
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 49f6cab01ed5..f24fd29ea207 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -58,6 +58,7 @@
#include "enic_dev.h"
#include "enic_pp.h"
#include "enic_clsf.h"
+#include "enic_rq.h"
#define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ)
#define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS)
@@ -68,8 +69,6 @@
#define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */
#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */
-#define RX_COPYBREAK_DEFAULT 256
-
/* Supported devices */
static const struct pci_device_id enic_id_table[] = {
{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) },
@@ -1313,243 +1312,6 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
-{
- struct enic *enic = vnic_dev_priv(rq->vdev);
-
- if (!buf->os_buf)
- return;
-
- dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
- DMA_FROM_DEVICE);
- dev_kfree_skb_any(buf->os_buf);
- buf->os_buf = NULL;
-}
-
-static int enic_rq_alloc_buf(struct vnic_rq *rq)
-{
- struct enic *enic = vnic_dev_priv(rq->vdev);
- struct net_device *netdev = enic->netdev;
- struct sk_buff *skb;
- unsigned int len = netdev->mtu + VLAN_ETH_HLEN;
- unsigned int os_buf_index = 0;
- dma_addr_t dma_addr;
- struct vnic_rq_buf *buf = rq->to_use;
-
- if (buf->os_buf) {
- enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr,
- buf->len);
-
- return 0;
- }
- skb = netdev_alloc_skb_ip_align(netdev, len);
- if (!skb) {
- enic->rq[rq->index].stats.no_skb++;
- return -ENOMEM;
- }
-
- dma_addr = dma_map_single(&enic->pdev->dev, skb->data, len,
- DMA_FROM_DEVICE);
- if (unlikely(enic_dma_map_check(enic, dma_addr))) {
- dev_kfree_skb(skb);
- return -ENOMEM;
- }
-
- enic_queue_rq_desc(rq, skb, os_buf_index,
- dma_addr, len);
-
- return 0;
-}
-
-static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size,
- u32 pkt_len)
-{
- if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len)
- pkt_size->large_pkt_bytes_cnt += pkt_len;
- else
- pkt_size->small_pkt_bytes_cnt += pkt_len;
-}
-
-static bool enic_rxcopybreak(struct net_device *netdev, struct sk_buff **skb,
- struct vnic_rq_buf *buf, u16 len)
-{
- struct enic *enic = netdev_priv(netdev);
- struct sk_buff *new_skb;
-
- if (len > enic->rx_copybreak)
- return false;
- new_skb = netdev_alloc_skb_ip_align(netdev, len);
- if (!new_skb)
- return false;
- dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr, len,
- DMA_FROM_DEVICE);
- memcpy(new_skb->data, (*skb)->data, len);
- *skb = new_skb;
-
- return true;
-}
-
-static void enic_rq_indicate_buf(struct vnic_rq *rq,
- struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
- int skipped, void *opaque)
-{
- struct enic *enic = vnic_dev_priv(rq->vdev);
- struct net_device *netdev = enic->netdev;
- struct sk_buff *skb;
- struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
- struct enic_rq_stats *rqstats = &enic->rq[rq->index].stats;
-
- u8 type, color, eop, sop, ingress_port, vlan_stripped;
- u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
- u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok;
- u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc;
- u8 packet_error;
- u16 q_number, completed_index, bytes_written, vlan_tci, checksum;
- u32 rss_hash;
- bool outer_csum_ok = true, encap = false;
-
- rqstats->packets++;
- if (skipped) {
- rqstats->desc_skip++;
- return;
- }
-
- skb = buf->os_buf;
-
- cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc,
- &type, &color, &q_number, &completed_index,
- &ingress_port, &fcoe, &eop, &sop, &rss_type,
- &csum_not_calc, &rss_hash, &bytes_written,
- &packet_error, &vlan_stripped, &vlan_tci, &checksum,
- &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error,
- &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp,
- &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment,
- &fcs_ok);
-
- if (packet_error) {
-
- if (!fcs_ok) {
- if (bytes_written > 0)
- rqstats->bad_fcs++;
- else if (bytes_written == 0)
- rqstats->pkt_truncated++;
- }
-
- dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
- DMA_FROM_DEVICE);
- dev_kfree_skb_any(skb);
- buf->os_buf = NULL;
-
- return;
- }
-
- if (eop && bytes_written > 0) {
-
- /* Good receive
- */
- rqstats->bytes += bytes_written;
- if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) {
- buf->os_buf = NULL;
- dma_unmap_single(&enic->pdev->dev, buf->dma_addr,
- buf->len, DMA_FROM_DEVICE);
- }
- prefetch(skb->data - NET_IP_ALIGN);
-
- skb_put(skb, bytes_written);
- skb->protocol = eth_type_trans(skb, netdev);
- skb_record_rx_queue(skb, q_number);
- if ((netdev->features & NETIF_F_RXHASH) && rss_hash &&
- (type == 3)) {
- switch (rss_type) {
- case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4:
- case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6:
- case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX:
- skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4);
- rqstats->l4_rss_hash++;
- break;
- case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4:
- case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6:
- case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX:
- skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3);
- rqstats->l3_rss_hash++;
- break;
- }
- }
- if (enic->vxlan.vxlan_udp_port_number) {
- switch (enic->vxlan.patch_level) {
- case 0:
- if (fcoe) {
- encap = true;
- outer_csum_ok = fcoe_fc_crc_ok;
- }
- break;
- case 2:
- if ((type == 7) &&
- (rss_hash & BIT(0))) {
- encap = true;
- outer_csum_ok = (rss_hash & BIT(1)) &&
- (rss_hash & BIT(2));
- }
- break;
- }
- }
-
- /* Hardware does not provide whole packet checksum. It only
- * provides pseudo checksum. Since hw validates the packet
- * checksum but not provide us the checksum value. use
- * CHECSUM_UNNECESSARY.
- *
- * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is
- * inner csum_ok. outer_csum_ok is set by hw when outer udp
- * csum is correct or is zero.
- */
- if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc &&
- tcp_udp_csum_ok && outer_csum_ok &&
- (ipv4_csum_ok || ipv6)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = encap;
- if (encap)
- rqstats->csum_unnecessary_encap++;
- else
- rqstats->csum_unnecessary++;
- }
-
- if (vlan_stripped) {
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
- rqstats->vlan_stripped++;
- }
- skb_mark_napi_id(skb, &enic->napi[rq->index]);
- if (!(netdev->features & NETIF_F_GRO))
- netif_receive_skb(skb);
- else
- napi_gro_receive(&enic->napi[q_number], skb);
- if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
- enic_intr_update_pkt_size(&cq->pkt_size_counter,
- bytes_written);
- } else {
-
- /* Buffer overflow
- */
- rqstats->pkt_truncated++;
- dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
- DMA_FROM_DEVICE);
- dev_kfree_skb_any(skb);
- buf->os_buf = NULL;
- }
-}
-
-static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
- u8 type, u16 q_number, u16 completed_index, void *opaque)
-{
- struct enic *enic = vnic_dev_priv(vdev);
-
- vnic_rq_service(&enic->rq[q_number].vrq, cq_desc,
- completed_index, VNIC_RQ_RETURN_DESC,
- enic_rq_indicate_buf, opaque);
-
- return 0;
-}
-
static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
{
unsigned int intr = enic_msix_rq_intr(enic, rq->index);
@@ -1972,6 +1734,17 @@ static int enic_open(struct net_device *netdev)
struct enic *enic = netdev_priv(netdev);
unsigned int i;
int err, ret;
+ unsigned int max_pkt_len = netdev->mtu + VLAN_ETH_HLEN;
+ struct page_pool_params pp_params = {
+ .order = get_order(max_pkt_len),
+ .pool_size = enic->config.rq_desc_count,
+ .nid = dev_to_node(&enic->pdev->dev),
+ .dev = &enic->pdev->dev,
+ .dma_dir = DMA_FROM_DEVICE,
+ .max_len = (max_pkt_len > PAGE_SIZE) ? max_pkt_len : PAGE_SIZE,
+ .netdev = netdev,
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ };
err = enic_request_intr(enic);
if (err) {
@@ -1989,6 +1762,16 @@ static int enic_open(struct net_device *netdev)
}
for (i = 0; i < enic->rq_count; i++) {
+ /* create a page pool for each RQ */
+ pp_params.napi = &enic->napi[i];
+ pp_params.queue_idx = i;
+ enic->rq[i].pool = page_pool_create(&pp_params);
+ if (IS_ERR(enic->rq[i].pool)) {
+ err = PTR_ERR(enic->rq[i].pool);
+ enic->rq[i].pool = NULL;
+ goto err_out_free_rq;
+ }
+
/* enable rq before updating rq desc */
vnic_rq_enable(&enic->rq[i].vrq);
vnic_rq_fill(&enic->rq[i].vrq, enic_rq_alloc_buf);
@@ -2029,8 +1812,11 @@ static int enic_open(struct net_device *netdev)
err_out_free_rq:
for (i = 0; i < enic->rq_count; i++) {
ret = vnic_rq_disable(&enic->rq[i].vrq);
- if (!ret)
+ if (!ret) {
vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf);
+ page_pool_destroy(enic->rq[i].pool);
+ enic->rq[i].pool = NULL;
+ }
}
enic_dev_notify_unset(enic);
err_out_free_intr:
@@ -2088,8 +1874,11 @@ static int enic_stop(struct net_device *netdev)
for (i = 0; i < enic->wq_count; i++)
vnic_wq_clean(&enic->wq[i].vwq, enic_free_wq_buf);
- for (i = 0; i < enic->rq_count; i++)
+ for (i = 0; i < enic->rq_count; i++) {
vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf);
+ page_pool_destroy(enic->rq[i].pool);
+ enic->rq[i].pool = NULL;
+ }
for (i = 0; i < enic->cq_count; i++)
vnic_cq_clean(&enic->cq[i]);
for (i = 0; i < enic->intr_count; i++)
@@ -2599,6 +2388,7 @@ static void enic_get_queue_stats_rx(struct net_device *dev, int idx,
rxs->hw_drop_overruns = rqstats->pkt_truncated;
rxs->csum_unnecessary = rqstats->csum_unnecessary +
rqstats->csum_unnecessary_encap;
+ rxs->alloc_fail = rqstats->pp_alloc_fail;
}
static void enic_get_queue_stats_tx(struct net_device *dev, int idx,
@@ -2626,6 +2416,7 @@ static void enic_get_base_stats(struct net_device *dev,
rxs->hw_drops = 0;
rxs->hw_drop_overruns = 0;
rxs->csum_unnecessary = 0;
+ rxs->alloc_fail = 0;
txs->bytes = 0;
txs->packets = 0;
txs->csum_none = 0;
@@ -3179,7 +2970,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(dev, "Cannot register net device, aborting\n");
goto err_out_dev_deinit;
}
- enic->rx_copybreak = RX_COPYBREAK_DEFAULT;
return 0;
diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.c b/drivers/net/ethernet/cisco/enic/enic_rq.c
new file mode 100644
index 000000000000..e3228ef7988a
--- /dev/null
+++ b/drivers/net/ethernet/cisco/enic/enic_rq.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2024 Cisco Systems, Inc. All rights reserved.
+
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <net/busy_poll.h>
+#include "enic.h"
+#include "enic_res.h"
+#include "enic_rq.h"
+#include "vnic_rq.h"
+#include "cq_enet_desc.h"
+
+#define ENIC_LARGE_PKT_THRESHOLD 1000
+
+static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size,
+ u32 pkt_len)
+{
+ if (pkt_len > ENIC_LARGE_PKT_THRESHOLD)
+ pkt_size->large_pkt_bytes_cnt += pkt_len;
+ else
+ pkt_size->small_pkt_bytes_cnt += pkt_len;
+}
+
+int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type,
+ u16 q_number, u16 completed_index, void *opaque)
+{
+ struct enic *enic = vnic_dev_priv(vdev);
+
+ vnic_rq_service(&enic->rq[q_number].vrq, cq_desc, completed_index,
+ VNIC_RQ_RETURN_DESC, enic_rq_indicate_buf, opaque);
+ return 0;
+}
+
+static void enic_rq_set_skb_flags(struct vnic_rq *vrq, u8 type, u32 rss_hash,
+ u8 rss_type, u8 fcoe, u8 fcoe_fc_crc_ok,
+ u8 vlan_stripped, u8 csum_not_calc,
+ u8 tcp_udp_csum_ok, u8 ipv6, u8 ipv4_csum_ok,
+ u16 vlan_tci, struct sk_buff *skb)
+{
+ struct enic *enic = vnic_dev_priv(vrq->vdev);
+ struct net_device *netdev = enic->netdev;
+ struct enic_rq_stats *rqstats = &enic->rq[vrq->index].stats;
+ bool outer_csum_ok = true, encap = false;
+
+ if ((netdev->features & NETIF_F_RXHASH) && rss_hash && type == 3) {
+ switch (rss_type) {
+ case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4:
+ case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6:
+ case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX:
+ skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4);
+ rqstats->l4_rss_hash++;
+ break;
+ case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4:
+ case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6:
+ case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX:
+ skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3);
+ rqstats->l3_rss_hash++;
+ break;
+ }
+ }
+ if (enic->vxlan.vxlan_udp_port_number) {
+ switch (enic->vxlan.patch_level) {
+ case 0:
+ if (fcoe) {
+ encap = true;
+ outer_csum_ok = fcoe_fc_crc_ok;
+ }
+ break;
+ case 2:
+ if (type == 7 && (rss_hash & BIT(0))) {
+ encap = true;
+ outer_csum_ok = (rss_hash & BIT(1)) &&
+ (rss_hash & BIT(2));
+ }
+ break;
+ }
+ }
+
+ /* Hardware does not provide whole packet checksum. It only
+ * provides pseudo checksum. Since hw validates the packet
+ * checksum but not provide us the checksum value. use
+ * CHECSUM_UNNECESSARY.
+ *
+ * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is
+ * inner csum_ok. outer_csum_ok is set by hw when outer udp
+ * csum is correct or is zero.
+ */
+ if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc &&
+ tcp_udp_csum_ok && outer_csum_ok && (ipv4_csum_ok || ipv6)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = encap;
+ if (encap)
+ rqstats->csum_unnecessary_encap++;
+ else
+ rqstats->csum_unnecessary++;
+ }
+
+ if (vlan_stripped) {
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+ rqstats->vlan_stripped++;
+ }
+}
+
+static bool enic_rq_pkt_error(struct vnic_rq *vrq, u8 packet_error, u8 fcs_ok,
+ u16 bytes_written)
+{
+ struct enic *enic = vnic_dev_priv(vrq->vdev);
+ struct enic_rq_stats *rqstats = &enic->rq[vrq->index].stats;
+
+ if (packet_error) {
+ if (!fcs_ok) {
+ if (bytes_written > 0)
+ rqstats->bad_fcs++;
+ else if (bytes_written == 0)
+ rqstats->pkt_truncated++;
+ }
+ return true;
+ }
+ return false;
+}
+
+int enic_rq_alloc_buf(struct vnic_rq *rq)
+{
+ struct enic *enic = vnic_dev_priv(rq->vdev);
+ struct net_device *netdev = enic->netdev;
+ struct enic_rq *erq = &enic->rq[rq->index];
+ struct enic_rq_stats *rqstats = &erq->stats;
+ unsigned int offset = 0;
+ unsigned int len = netdev->mtu + VLAN_ETH_HLEN;
+ unsigned int os_buf_index = 0;
+ dma_addr_t dma_addr;
+ struct vnic_rq_buf *buf = rq->to_use;
+ struct page *page;
+ unsigned int truesize = len;
+
+ if (buf->os_buf) {
+ enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr,
+ buf->len);
+
+ return 0;
+ }
+
+ page = page_pool_dev_alloc(erq->pool, &offset, &truesize);
+ if (unlikely(!page)) {
+ rqstats->pp_alloc_fail++;
+ return -ENOMEM;
+ }
+ buf->offset = offset;
+ buf->truesize = truesize;
+ dma_addr = page_pool_get_dma_addr(page) + offset;
+ enic_queue_rq_desc(rq, (void *)page, os_buf_index, dma_addr, len);
+
+ return 0;
+}
+
+void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
+{
+ struct enic *enic = vnic_dev_priv(rq->vdev);
+ struct enic_rq *erq = &enic->rq[rq->index];
+
+ if (!buf->os_buf)
+ return;
+
+ page_pool_put_full_page(erq->pool, (struct page *)buf->os_buf, true);
+ buf->os_buf = NULL;
+}
+
+void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc,
+ struct vnic_rq_buf *buf, int skipped, void *opaque)
+{
+ struct enic *enic = vnic_dev_priv(rq->vdev);
+ struct sk_buff *skb;
+ struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+ struct enic_rq_stats *rqstats = &enic->rq[rq->index].stats;
+ struct napi_struct *napi;
+
+ u8 type, color, eop, sop, ingress_port, vlan_stripped;
+ u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
+ u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok;
+ u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc;
+ u8 packet_error;
+ u16 q_number, completed_index, bytes_written, vlan_tci, checksum;
+ u32 rss_hash;
+
+ rqstats->packets++;
+ if (skipped) {
+ rqstats->desc_skip++;
+ return;
+ }
+
+ cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, &type, &color,
+ &q_number, &completed_index, &ingress_port, &fcoe,
+ &eop, &sop, &rss_type, &csum_not_calc, &rss_hash,
+ &bytes_written, &packet_error, &vlan_stripped,
+ &vlan_tci, &checksum, &fcoe_sof, &fcoe_fc_crc_ok,
+ &fcoe_enc_error, &fcoe_eof, &tcp_udp_csum_ok, &udp,
+ &tcp, &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment,
+ &fcs_ok);
+
+ if (enic_rq_pkt_error(rq, packet_error, fcs_ok, bytes_written))
+ return;
+
+ if (eop && bytes_written > 0) {
+ /* Good receive
+ */
+ rqstats->bytes += bytes_written;
+ napi = &enic->napi[rq->index];
+ skb = napi_get_frags(napi);
+ if (unlikely(!skb)) {
+ net_warn_ratelimited("%s: skb alloc error rq[%d], desc[%d]\n",
+ enic->netdev->name, rq->index,
+ completed_index);
+ rqstats->no_skb++;
+ return;
+ }
+
+ prefetch(skb->data - NET_IP_ALIGN);
+
+ dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr,
+ bytes_written, DMA_FROM_DEVICE);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ (struct page *)buf->os_buf, buf->offset,
+ bytes_written, buf->truesize);
+ skb_record_rx_queue(skb, q_number);
+ enic_rq_set_skb_flags(rq, type, rss_hash, rss_type, fcoe,
+ fcoe_fc_crc_ok, vlan_stripped,
+ csum_not_calc, tcp_udp_csum_ok, ipv6,
+ ipv4_csum_ok, vlan_tci, skb);
+ skb_mark_for_recycle(skb);
+ napi_gro_frags(napi);
+ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+ enic_intr_update_pkt_size(&cq->pkt_size_counter,
+ bytes_written);
+ buf->os_buf = NULL;
+ buf->dma_addr = 0;
+ buf = buf->next;
+ } else {
+ /* Buffer overflow
+ */
+ rqstats->pkt_truncated++;
+ }
+}
diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.h b/drivers/net/ethernet/cisco/enic/enic_rq.h
new file mode 100644
index 000000000000..a75d07562686
--- /dev/null
+++ b/drivers/net/ethernet/cisco/enic/enic_rq.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright 2024 Cisco Systems, Inc. All rights reserved.
+ */
+
+int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type,
+ u16 q_number, u16 completed_index, void *opaque);
+void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc,
+ struct vnic_rq_buf *buf, int skipped, void *opaque);
+int enic_rq_alloc_buf(struct vnic_rq *rq);
+void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h
index 0bc595abc03b..2ee4be2b9a34 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_rq.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h
@@ -61,6 +61,8 @@ struct vnic_rq_buf {
unsigned int index;
void *desc;
uint64_t wr_id;
+ unsigned int offset;
+ unsigned int truesize;
};
enum enic_poll_state {
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f7c4ce8e9a26..a86cfebedaa8 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1093,6 +1093,29 @@ static void fec_enet_enable_ring(struct net_device *ndev)
}
}
+/* Whack a reset. We should wait for this.
+ * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
+ * instead of reset MAC itself.
+ */
+static void fec_ctrl_reset(struct fec_enet_private *fep, bool allow_wol)
+{
+ u32 val;
+
+ if (!allow_wol || !(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
+ if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES ||
+ ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) {
+ writel(0, fep->hwp + FEC_ECNTRL);
+ } else {
+ writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL);
+ udelay(10);
+ }
+ } else {
+ val = readl(fep->hwp + FEC_ECNTRL);
+ val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
+ writel(val, fep->hwp + FEC_ECNTRL);
+ }
+}
+
/*
* This function is called to start or restart the FEC during a link
* change, transmit timeout, or to reconfigure the FEC. The network
@@ -1109,17 +1132,7 @@ fec_restart(struct net_device *ndev)
if (fep->bufdesc_ex)
fec_ptp_save_state(fep);
- /* Whack a reset. We should wait for this.
- * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
- * instead of reset MAC itself.
- */
- if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES ||
- ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) {
- writel(0, fep->hwp + FEC_ECNTRL);
- } else {
- writel(1, fep->hwp + FEC_ECNTRL);
- udelay(10);
- }
+ fec_ctrl_reset(fep, false);
/*
* enet-mac reset will reset mac address registers too,
@@ -1373,22 +1386,7 @@ fec_stop(struct net_device *ndev)
if (fep->bufdesc_ex)
fec_ptp_save_state(fep);
- /* Whack a reset. We should wait for this.
- * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
- * instead of reset MAC itself.
- */
- if (!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
- if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) {
- writel(0, fep->hwp + FEC_ECNTRL);
- } else {
- writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL);
- udelay(10);
- }
- } else {
- val = readl(fep->hwp + FEC_ECNTRL);
- val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
- writel(val, fep->hwp + FEC_ECNTRL);
- }
+ fec_ctrl_reset(fep, true);
writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index b3e2a596ad2c..51402dff72c5 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1446,7 +1446,6 @@ int dtsec_initialization(struct mac_device *mac_dev,
goto _return_fm_mac_free;
}
dtsec->pcs.ops = &dtsec_pcs_ops;
- dtsec->pcs.neg_mode = true;
dtsec->pcs.poll = true;
supported = mac_dev->phylink_config.supported_interfaces;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 435138f4699d..deb35b38c976 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1647,20 +1647,11 @@ static void gfar_configure_serdes(struct net_device *dev)
*/
static int init_phy(struct net_device *dev)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
struct gfar_private *priv = netdev_priv(dev);
phy_interface_t interface = priv->interface;
struct phy_device *phydev;
struct ethtool_keee edata;
- linkmode_set_bit_array(phy_10_100_features_array,
- ARRAY_SIZE(phy_10_100_features_array),
- mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask);
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask);
-
priv->oldlink = 0;
priv->oldspeed = 0;
priv->oldduplex = -1;
@@ -1675,9 +1666,8 @@ static int init_phy(struct net_device *dev)
if (interface == PHY_INTERFACE_MODE_SGMII)
gfar_configure_serdes(dev);
- /* Remove any features not supported by the controller */
- linkmode_and(phydev->supported, phydev->supported, mask);
- linkmode_copy(phydev->advertising, phydev->supported);
+ if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT))
+ phy_set_max_speed(phydev, SPEED_100);
/* Add support for flow control */
phy_support_asym_pause(phydev);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 88510f822759..affd5a6c44e7 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3408,7 +3408,7 @@ static int ucc_geth_parse_clock(struct device_node *np, const char *which,
return 0;
}
-struct phylink_mac_ops ugeth_mac_ops = {
+static const struct phylink_mac_ops ugeth_mac_ops = {
.mac_link_up = ugeth_mac_link_up,
.mac_link_down = ugeth_mac_link_down,
.mac_config = ugeth_mac_config,
diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index 38789faae706..84f92f6384e7 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -890,8 +890,6 @@ struct ucc_geth_hardware_statistics {
addresses */
#define TX_TIMEOUT (1*HZ)
-#define PHY_INIT_TIMEOUT 100000
-#define PHY_CHANGE_TIME 2
/* Fast Ethernet (10/100 Mbps) */
#define UCC_GETH_URFS_INIT 512 /* Rx virtual FIFO size
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 25b8a3556004..417dfa18daae 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2554,17 +2554,12 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
struct mii_bus *bus;
int res;
- mii_np = of_get_child_by_name(dev->ofdev->dev.of_node, "mdio");
+ mii_np = of_get_available_child_by_name(dev->ofdev->dev.of_node, "mdio");
if (!mii_np) {
dev_err(&dev->ofdev->dev, "no mdio definition found.");
return -ENODEV;
}
- if (!of_device_is_available(mii_np)) {
- res = -ENODEV;
- goto put_node;
- }
-
bus = devm_mdiobus_alloc(&dev->ofdev->dev);
if (!bus) {
res = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 24ec9a4f1ffa..1640d2f27833 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -264,6 +264,7 @@ config I40EVF
tristate "Intel(R) Ethernet Adaptive Virtual Function support"
select IAVF
depends on PCI_MSI
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This driver supports virtual functions for Intel XL710,
X710, X722, XXV710, and all devices advertising support for
@@ -336,7 +337,7 @@ config ICE_SWITCHDEV
config ICE_HWTS
bool "Support HW cross-timestamp on platforms with PTM support"
default y
- depends on ICE && X86
+ depends on ICE && X86 && PCIE_PTM
help
Say Y to enable hardware supported cross-timestamping on platforms
with PCIe PTM support. The cross-timestamp is available through
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index d7df2a0ed629..44249dd91bd6 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -331,8 +331,21 @@ void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw,
}
/* replace the entire MTA table */
- for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) {
E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]);
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ /*
+ * Do not queue up too many posted writes to prevent
+ * increased latency for other devices on the
+ * interconnect. Flush after each 8th posted write,
+ * to keep additional execution time low while still
+ * preventing increased latency.
+ */
+ if (!(i % 8) && i)
+ e1e_flush();
+ }
+ }
e1e_flush();
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index e28f1905a4a0..9f47388eaba5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -2,6 +2,7 @@
/* Copyright(c) 2018 Intel Corporation. */
#include <linux/bpf_trace.h>
+#include <linux/unroll.h>
#include <net/xdp_sock_drv.h>
#include "i40e_txrx_common.h"
#include "i40e_xsk.h"
@@ -529,7 +530,8 @@ static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *des
dma_addr_t dma;
u32 i;
- loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+ unrolled_count(PKTS_PER_BATCH)
+ for (i = 0; i < PKTS_PER_BATCH; i++) {
u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(&desc[i]);
dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index ef156fad52f2..dd16351a7af8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -6,7 +6,7 @@
#include <linux/types.h>
-/* This value should match the pragma in the loop_unrolled_for
+/* This value should match the pragma in the unrolled_count()
* macro. Why 4? It is strictly empirical. It seems to be a good
* compromise between the advantage of having simultaneous outstanding
* reads to the DMA array that can hide each others latency and the
@@ -14,14 +14,6 @@
*/
#define PKTS_PER_BATCH 4
-#ifdef __clang__
-#define loop_unrolled_for _Pragma("clang loop unroll_count(4)") for
-#elif __GNUC__ >= 8
-#define loop_unrolled_for _Pragma("GCC unroll 4") for
-#else
-#define loop_unrolled_for for
-#endif
-
struct i40e_ring;
struct i40e_vsi;
struct net_device;
diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
index 356ac9faa5bf..e13720a728ff 100644
--- a/drivers/net/ethernet/intel/iavf/Makefile
+++ b/drivers/net/ethernet/intel/iavf/Makefile
@@ -13,3 +13,5 @@ obj-$(CONFIG_IAVF) += iavf.o
iavf-y := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \
iavf_adv_rss.o iavf_txrx.o iavf_common.o iavf_adminq.o
+
+iavf-$(CONFIG_PTP_1588_CLOCK) += iavf_ptp.o
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 532a0a595fe8..9de3e0ba3731 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -41,6 +41,7 @@
#include "iavf_txrx.h"
#include "iavf_fdir.h"
#include "iavf_adv_rss.h"
+#include "iavf_types.h"
#include <linux/bitmap.h>
#define DEFAULT_DEBUG_LEVEL_SHIFT 3
@@ -82,7 +83,7 @@ struct iavf_vsi {
#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
-#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i]))
+#define IAVF_RX_DESC(R, i) (&(((struct iavf_rx_desc *)((R)->desc))[i]))
#define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i]))
#define IAVF_TX_CTXTDESC(R, i) \
(&(((struct iavf_tx_context_desc *)((R)->desc))[i]))
@@ -271,6 +272,7 @@ struct iavf_adapter {
/* Lock to protect accesses to MAC and VLAN lists */
spinlock_t mac_vlan_list_lock;
char misc_vector_name[IFNAMSIZ + 9];
+ u8 rxdid;
int num_active_queues;
int num_req_queues;
@@ -343,6 +345,17 @@ struct iavf_adapter {
#define IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW BIT_ULL(39)
#define IAVF_FLAG_AQ_CFG_QUEUES_QUANTA_SIZE BIT_ULL(40)
#define IAVF_FLAG_AQ_GET_QOS_CAPS BIT_ULL(41)
+#define IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS BIT_ULL(42)
+#define IAVF_FLAG_AQ_GET_PTP_CAPS BIT_ULL(43)
+#define IAVF_FLAG_AQ_SEND_PTP_CMD BIT_ULL(44)
+
+ /* AQ messages that must be sent after IAVF_FLAG_AQ_GET_CONFIG, in
+ * order to negotiated extended capabilities.
+ */
+#define IAVF_FLAG_AQ_EXTENDED_CAPS \
+ (IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS | \
+ IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS | \
+ IAVF_FLAG_AQ_GET_PTP_CAPS)
/* flags for processing extended capability messages during
* __IAVF_INIT_EXTENDED_CAPS. Each capability exchange requires
@@ -354,10 +367,18 @@ struct iavf_adapter {
u64 extended_caps;
#define IAVF_EXTENDED_CAP_SEND_VLAN_V2 BIT_ULL(0)
#define IAVF_EXTENDED_CAP_RECV_VLAN_V2 BIT_ULL(1)
+#define IAVF_EXTENDED_CAP_SEND_RXDID BIT_ULL(2)
+#define IAVF_EXTENDED_CAP_RECV_RXDID BIT_ULL(3)
+#define IAVF_EXTENDED_CAP_SEND_PTP BIT_ULL(4)
+#define IAVF_EXTENDED_CAP_RECV_PTP BIT_ULL(5)
#define IAVF_EXTENDED_CAPS \
(IAVF_EXTENDED_CAP_SEND_VLAN_V2 | \
- IAVF_EXTENDED_CAP_RECV_VLAN_V2)
+ IAVF_EXTENDED_CAP_RECV_VLAN_V2 | \
+ IAVF_EXTENDED_CAP_SEND_RXDID | \
+ IAVF_EXTENDED_CAP_RECV_RXDID | \
+ IAVF_EXTENDED_CAP_SEND_PTP | \
+ IAVF_EXTENDED_CAP_RECV_PTP)
/* Lock to prevent possible clobbering of
* current_netdev_promisc_flags
@@ -417,12 +438,18 @@ struct iavf_adapter {
VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF)
#define QOS_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
VIRTCHNL_VF_OFFLOAD_QOS)
+#define IAVF_RXDID_ALLOWED(a) \
+ ((a)->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+#define IAVF_PTP_ALLOWED(a) \
+ ((a)->vf_res->vf_cap_flags & VIRTCHNL_VF_CAP_PTP)
struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
struct virtchnl_version_info pf_version;
#define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \
((_a)->pf_version.minor == 1))
struct virtchnl_vlan_caps vlan_v2_caps;
+ u64 supp_rxdids;
+ struct iavf_ptp ptp;
u16 msg_enable;
struct iavf_eth_stats current_stats;
struct virtchnl_qos_cap_list *qos_caps;
@@ -555,6 +582,10 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter);
int iavf_get_vf_config(struct iavf_adapter *adapter);
int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter);
int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
+int iavf_send_vf_supported_rxdids_msg(struct iavf_adapter *adapter);
+int iavf_get_vf_supported_rxdids(struct iavf_adapter *adapter);
+int iavf_send_vf_ptp_caps_msg(struct iavf_adapter *adapter);
+int iavf_get_vf_ptp_caps(struct iavf_adapter *adapter);
void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 852e5b62f0a5..4c29739780bd 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -4,6 +4,7 @@
#include <linux/net/intel/libie/rx.h>
#include "iavf.h"
+#include "iavf_ptp.h"
#include "iavf_prototype.h"
/* All iavf tracepoints are defined by the include below, which must
* be included exactly once across the whole kernel with
@@ -710,6 +711,47 @@ static void iavf_configure_tx(struct iavf_adapter *adapter)
}
/**
+ * iavf_select_rx_desc_format - Select Rx descriptor format
+ * @adapter: adapter private structure
+ *
+ * Select what Rx descriptor format based on availability and enabled
+ * features.
+ *
+ * Return: the desired RXDID to select for a given Rx queue, as defined by
+ * enum virtchnl_rxdid_format.
+ */
+static u8 iavf_select_rx_desc_format(const struct iavf_adapter *adapter)
+{
+ u64 rxdids = adapter->supp_rxdids;
+
+ /* If we did not negotiate VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC, we must
+ * stick with the default value of the legacy 32 byte format.
+ */
+ if (!IAVF_RXDID_ALLOWED(adapter))
+ return VIRTCHNL_RXDID_1_32B_BASE;
+
+ /* Rx timestamping requires the use of flexible NIC descriptors */
+ if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP)) {
+ if (rxdids & BIT(VIRTCHNL_RXDID_2_FLEX_SQ_NIC))
+ return VIRTCHNL_RXDID_2_FLEX_SQ_NIC;
+
+ pci_warn(adapter->pdev,
+ "Unable to negotiate flexible descriptor format\n");
+ }
+
+ /* Warn if the PF does not list support for the default legacy
+ * descriptor format. This shouldn't happen, as this is the format
+ * used if VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is not supported. It is
+ * likely caused by a bug in the PF implementation failing to indicate
+ * support for the format.
+ */
+ if (!(rxdids & VIRTCHNL_RXDID_1_32B_BASE_M))
+ netdev_warn(adapter->netdev, "PF does not list support for default Rx descriptor format\n");
+
+ return VIRTCHNL_RXDID_1_32B_BASE;
+}
+
+/**
* iavf_configure_rx - Configure Receive Unit after Reset
* @adapter: board private structure
*
@@ -719,8 +761,12 @@ static void iavf_configure_rx(struct iavf_adapter *adapter)
{
struct iavf_hw *hw = &adapter->hw;
- for (u32 i = 0; i < adapter->num_active_queues; i++)
+ adapter->rxdid = iavf_select_rx_desc_format(adapter);
+
+ for (u32 i = 0; i < adapter->num_active_queues; i++) {
adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i);
+ adapter->rx_rings[i].rxdid = adapter->rxdid;
+ }
}
/**
@@ -2071,6 +2117,10 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
return iavf_send_vf_config_msg(adapter);
if (adapter->aq_required & IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS)
return iavf_send_vf_offload_vlan_v2_msg(adapter);
+ if (adapter->aq_required & IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS)
+ return iavf_send_vf_supported_rxdids_msg(adapter);
+ if (adapter->aq_required & IAVF_FLAG_AQ_GET_PTP_CAPS)
+ return iavf_send_vf_ptp_caps_msg(adapter);
if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) {
iavf_disable_queues(adapter);
return 0;
@@ -2235,7 +2285,10 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
iavf_enable_vlan_insertion_v2(adapter, ETH_P_8021AD);
return 0;
}
-
+ if (adapter->aq_required & IAVF_FLAG_AQ_SEND_PTP_CMD) {
+ iavf_virtchnl_send_ptp_cmd(adapter);
+ return IAVF_SUCCESS;
+ }
if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) {
iavf_request_stats(adapter);
return 0;
@@ -2600,6 +2653,112 @@ err:
}
/**
+ * iavf_init_send_supported_rxdids - part of querying for supported RXDID
+ * formats
+ * @adapter: board private structure
+ *
+ * Function processes send of the request for supported RXDIDs to the PF.
+ * Must clear IAVF_EXTENDED_CAP_RECV_RXDID if the message is not sent, e.g.
+ * due to the PF not negotiating VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC.
+ */
+static void iavf_init_send_supported_rxdids(struct iavf_adapter *adapter)
+{
+ int ret;
+
+ ret = iavf_send_vf_supported_rxdids_msg(adapter);
+ if (ret == -EOPNOTSUPP) {
+ /* PF does not support VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC. In this
+ * case, we did not send the capability exchange message and
+ * do not expect a response.
+ */
+ adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_RXDID;
+ }
+
+ /* We sent the message, so move on to the next step */
+ adapter->extended_caps &= ~IAVF_EXTENDED_CAP_SEND_RXDID;
+}
+
+/**
+ * iavf_init_recv_supported_rxdids - part of querying for supported RXDID
+ * formats
+ * @adapter: board private structure
+ *
+ * Function processes receipt of the supported RXDIDs message from the PF.
+ **/
+static void iavf_init_recv_supported_rxdids(struct iavf_adapter *adapter)
+{
+ int ret;
+
+ memset(&adapter->supp_rxdids, 0, sizeof(adapter->supp_rxdids));
+
+ ret = iavf_get_vf_supported_rxdids(adapter);
+ if (ret)
+ goto err;
+
+ /* We've processed the PF response to the
+ * VIRTCHNL_OP_GET_SUPPORTED_RXDIDS message we sent previously.
+ */
+ adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_RXDID;
+ return;
+
+err:
+ /* We didn't receive a reply. Make sure we try sending again when
+ * __IAVF_INIT_FAILED attempts to recover.
+ */
+ adapter->extended_caps |= IAVF_EXTENDED_CAP_SEND_RXDID;
+ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+}
+
+/**
+ * iavf_init_send_ptp_caps - part of querying for extended PTP capabilities
+ * @adapter: board private structure
+ *
+ * Function processes send of the request for 1588 PTP capabilities to the PF.
+ * Must clear IAVF_EXTENDED_CAP_SEND_PTP if the message is not sent, e.g.
+ * due to the PF not negotiating VIRTCHNL_VF_PTP_CAP
+ */
+static void iavf_init_send_ptp_caps(struct iavf_adapter *adapter)
+{
+ if (iavf_send_vf_ptp_caps_msg(adapter) == -EOPNOTSUPP) {
+ /* PF does not support VIRTCHNL_VF_PTP_CAP. In this case, we
+ * did not send the capability exchange message and do not
+ * expect a response.
+ */
+ adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_PTP;
+ }
+
+ /* We sent the message, so move on to the next step */
+ adapter->extended_caps &= ~IAVF_EXTENDED_CAP_SEND_PTP;
+}
+
+/**
+ * iavf_init_recv_ptp_caps - part of querying for supported PTP capabilities
+ * @adapter: board private structure
+ *
+ * Function processes receipt of the PTP capabilities supported on this VF.
+ **/
+static void iavf_init_recv_ptp_caps(struct iavf_adapter *adapter)
+{
+ memset(&adapter->ptp.hw_caps, 0, sizeof(adapter->ptp.hw_caps));
+
+ if (iavf_get_vf_ptp_caps(adapter))
+ goto err;
+
+ /* We've processed the PF response to the VIRTCHNL_OP_1588_PTP_GET_CAPS
+ * message we sent previously.
+ */
+ adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_PTP;
+ return;
+
+err:
+ /* We didn't receive a reply. Make sure we try sending again when
+ * __IAVF_INIT_FAILED attempts to recover.
+ */
+ adapter->extended_caps |= IAVF_EXTENDED_CAP_SEND_PTP;
+ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+}
+
+/**
* iavf_init_process_extended_caps - Part of driver startup
* @adapter: board private structure
*
@@ -2623,6 +2782,24 @@ static void iavf_init_process_extended_caps(struct iavf_adapter *adapter)
return;
}
+ /* Process capability exchange for RXDID formats */
+ if (adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_RXDID) {
+ iavf_init_send_supported_rxdids(adapter);
+ return;
+ } else if (adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_RXDID) {
+ iavf_init_recv_supported_rxdids(adapter);
+ return;
+ }
+
+ /* Process capability exchange for PTP features */
+ if (adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_PTP) {
+ iavf_init_send_ptp_caps(adapter);
+ return;
+ } else if (adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_PTP) {
+ iavf_init_recv_ptp_caps(adapter);
+ return;
+ }
+
/* When we reach here, no further extended capabilities exchanges are
* necessary, so we finally transition into __IAVF_INIT_CONFIG_ADAPTER
*/
@@ -2714,6 +2891,9 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
if (QOS_ALLOWED(adapter))
adapter->aq_required |= IAVF_FLAG_AQ_GET_QOS_CAPS;
+ /* Setup initial PTP configuration */
+ iavf_ptp_init(adapter);
+
iavf_schedule_finish_config(adapter);
return;
@@ -3139,15 +3319,18 @@ continue_reset:
}
adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG;
- /* always set since VIRTCHNL_OP_GET_VF_RESOURCES has not been
- * sent/received yet, so VLAN_V2_ALLOWED() cannot is not reliable here,
- * however the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS won't be sent until
- * VIRTCHNL_OP_GET_VF_RESOURCES and VIRTCHNL_VF_OFFLOAD_VLAN_V2 have
- * been successfully sent and negotiated
- */
- adapter->aq_required |= IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS;
adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS;
+ /* Certain capabilities require an extended negotiation process using
+ * extra messages that must be processed after getting the VF
+ * configuration. The related checks such as VLAN_V2_ALLOWED() are not
+ * reliable here, since the configuration has not yet been negotiated.
+ *
+ * Always set these flags, since them related VIRTCHNL messages won't
+ * be sent until after VIRTCHNL_OP_GET_VF_RESOURCES.
+ */
+ adapter->aq_required |= IAVF_FLAG_AQ_EXTENDED_CAPS;
+
spin_lock_bh(&adapter->mac_vlan_list_lock);
/* Delete filter for the current MAC address, it could have
@@ -4996,6 +5179,25 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev,
return iavf_fix_strip_features(adapter, features);
}
+static int iavf_hwstamp_get(struct net_device *netdev,
+ struct kernel_hwtstamp_config *config)
+{
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+
+ *config = adapter->ptp.hwtstamp_config;
+
+ return 0;
+}
+
+static int iavf_hwstamp_set(struct net_device *netdev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+
+ return iavf_ptp_set_ts_config(adapter, config, extack);
+}
+
static int
iavf_verify_shaper(struct net_shaper_binding *binding,
const struct net_shaper *shaper,
@@ -5104,6 +5306,8 @@ static const struct net_device_ops iavf_netdev_ops = {
.ndo_set_features = iavf_set_features,
.ndo_setup_tc = iavf_setup_tc,
.net_shaper_ops = &iavf_shaper_ops,
+ .ndo_hwtstamp_get = iavf_hwstamp_get,
+ .ndo_hwtstamp_set = iavf_hwstamp_set,
};
/**
@@ -5358,6 +5562,10 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Setup the wait queue for indicating virtchannel events */
init_waitqueue_head(&adapter->vc_waitqueue);
+ INIT_LIST_HEAD(&adapter->ptp.aq_cmds);
+ init_waitqueue_head(&adapter->ptp.phc_time_waitqueue);
+ mutex_init(&adapter->ptp.aq_cmd_lock);
+
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
/* Initialization goes on in the work. Do not add more of it below. */
@@ -5514,6 +5722,8 @@ static void iavf_remove(struct pci_dev *pdev)
msleep(50);
}
+ iavf_ptp_release(adapter);
+
iavf_misc_irq_disable(adapter);
/* Shut down all the garbage mashers on the detention level */
cancel_work_sync(&adapter->reset_task);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.c b/drivers/net/ethernet/intel/iavf/iavf_ptp.c
new file mode 100644
index 000000000000..b4d5eda2e84f
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2024 Intel Corporation. */
+
+#include "iavf.h"
+#include "iavf_ptp.h"
+
+#define iavf_clock_to_adapter(info) \
+ container_of_const(info, struct iavf_adapter, ptp.info)
+
+/**
+ * iavf_ptp_disable_rx_tstamp - Disable timestamping in Rx rings
+ * @adapter: private adapter structure
+ *
+ * Disable timestamp reporting for all Rx rings.
+ */
+static void iavf_ptp_disable_rx_tstamp(struct iavf_adapter *adapter)
+{
+ for (u32 i = 0; i < adapter->num_active_queues; i++)
+ adapter->rx_rings[i].flags &= ~IAVF_TXRX_FLAGS_HW_TSTAMP;
+}
+
+/**
+ * iavf_ptp_enable_rx_tstamp - Enable timestamping in Rx rings
+ * @adapter: private adapter structure
+ *
+ * Enable timestamp reporting for all Rx rings.
+ */
+static void iavf_ptp_enable_rx_tstamp(struct iavf_adapter *adapter)
+{
+ for (u32 i = 0; i < adapter->num_active_queues; i++)
+ adapter->rx_rings[i].flags |= IAVF_TXRX_FLAGS_HW_TSTAMP;
+}
+
+/**
+ * iavf_ptp_set_timestamp_mode - Set device timestamping mode
+ * @adapter: private adapter structure
+ * @config: pointer to kernel_hwtstamp_config
+ *
+ * Set the timestamping mode requested from the userspace.
+ *
+ * Note: this function always translates Rx timestamp requests for any packet
+ * category into HWTSTAMP_FILTER_ALL.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int iavf_ptp_set_timestamp_mode(struct iavf_adapter *adapter,
+ struct kernel_hwtstamp_config *config)
+{
+ /* Reserved for future extensions. */
+ if (config->flags)
+ return -EINVAL;
+
+ switch (config->tx_type) {
+ case HWTSTAMP_TX_OFF:
+ break;
+ case HWTSTAMP_TX_ON:
+ return -EOPNOTSUPP;
+ default:
+ return -ERANGE;
+ }
+
+ if (config->rx_filter == HWTSTAMP_FILTER_NONE) {
+ iavf_ptp_disable_rx_tstamp(adapter);
+ return 0;
+ } else if (config->rx_filter > HWTSTAMP_FILTER_NTP_ALL) {
+ return -ERANGE;
+ } else if (!(iavf_ptp_cap_supported(adapter,
+ VIRTCHNL_1588_PTP_CAP_RX_TSTAMP))) {
+ return -EOPNOTSUPP;
+ }
+
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+ iavf_ptp_enable_rx_tstamp(adapter);
+
+ return 0;
+}
+
+/**
+ * iavf_ptp_set_ts_config - Set timestamping configuration
+ * @adapter: private adapter structure
+ * @config: pointer to kernel_hwtstamp_config structure
+ * @extack: pointer to netlink_ext_ack structure
+ *
+ * Program the requested timestamping configuration to the device.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int iavf_ptp_set_ts_config(struct iavf_adapter *adapter,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = iavf_ptp_set_timestamp_mode(adapter, config);
+ if (err)
+ return err;
+
+ /* Save successful settings for future reference */
+ adapter->ptp.hwtstamp_config = *config;
+
+ return 0;
+}
+
+/**
+ * iavf_ptp_cap_supported - Check if a PTP capability is supported
+ * @adapter: private adapter structure
+ * @cap: the capability bitmask to check
+ *
+ * Return: true if every capability set in cap is also set in the enabled
+ * capabilities reported by the PF, false otherwise.
+ */
+bool iavf_ptp_cap_supported(const struct iavf_adapter *adapter, u32 cap)
+{
+ if (!IAVF_PTP_ALLOWED(adapter))
+ return false;
+
+ /* Only return true if every bit in cap is set in hw_caps.caps */
+ return (adapter->ptp.hw_caps.caps & cap) == cap;
+}
+
+/**
+ * iavf_allocate_ptp_cmd - Allocate a PTP command message structure
+ * @v_opcode: the virtchnl opcode
+ * @msglen: length in bytes of the associated virtchnl structure
+ *
+ * Allocates a PTP command message and pre-fills it with the provided message
+ * length and opcode.
+ *
+ * Return: allocated PTP command.
+ */
+static struct iavf_ptp_aq_cmd *iavf_allocate_ptp_cmd(enum virtchnl_ops v_opcode,
+ u16 msglen)
+{
+ struct iavf_ptp_aq_cmd *cmd;
+
+ cmd = kzalloc(struct_size(cmd, msg, msglen), GFP_KERNEL);
+ if (!cmd)
+ return NULL;
+
+ cmd->v_opcode = v_opcode;
+ cmd->msglen = msglen;
+
+ return cmd;
+}
+
+/**
+ * iavf_queue_ptp_cmd - Queue PTP command for sending over virtchnl
+ * @adapter: private adapter structure
+ * @cmd: the command structure to send
+ *
+ * Queue the given command structure into the PTP virtchnl command queue tos
+ * end to the PF.
+ */
+static void iavf_queue_ptp_cmd(struct iavf_adapter *adapter,
+ struct iavf_ptp_aq_cmd *cmd)
+{
+ mutex_lock(&adapter->ptp.aq_cmd_lock);
+ list_add_tail(&cmd->list, &adapter->ptp.aq_cmds);
+ mutex_unlock(&adapter->ptp.aq_cmd_lock);
+
+ adapter->aq_required |= IAVF_FLAG_AQ_SEND_PTP_CMD;
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+}
+
+/**
+ * iavf_send_phc_read - Send request to read PHC time
+ * @adapter: private adapter structure
+ *
+ * Send a request to obtain the PTP hardware clock time. This allocates the
+ * VIRTCHNL_OP_1588_PTP_GET_TIME message and queues it up to send to
+ * indirectly read the PHC time.
+ *
+ * This function does not wait for the reply from the PF.
+ *
+ * Return: 0 if success, error code otherwise.
+ */
+static int iavf_send_phc_read(struct iavf_adapter *adapter)
+{
+ struct iavf_ptp_aq_cmd *cmd;
+
+ if (!adapter->ptp.clock)
+ return -EOPNOTSUPP;
+
+ cmd = iavf_allocate_ptp_cmd(VIRTCHNL_OP_1588_PTP_GET_TIME,
+ sizeof(struct virtchnl_phc_time));
+ if (!cmd)
+ return -ENOMEM;
+
+ iavf_queue_ptp_cmd(adapter, cmd);
+
+ return 0;
+}
+
+/**
+ * iavf_read_phc_indirect - Indirectly read the PHC time via virtchnl
+ * @adapter: private adapter structure
+ * @ts: storage for the timestamp value
+ * @sts: system timestamp values before and after the read
+ *
+ * Used when the device does not have direct register access to the PHC time.
+ * Indirectly reads the time via the VIRTCHNL_OP_1588_PTP_GET_TIME, and waits
+ * for the reply from the PF.
+ *
+ * Based on some simple measurements using ftrace and phc2sys, this clock
+ * access method has about a ~110 usec latency even when the system is not
+ * under load. In order to achieve acceptable results when using phc2sys with
+ * the indirect clock access method, it is recommended to use more
+ * conservative proportional and integration constants with the P/I servo.
+ *
+ * Return: 0 if success, error code otherwise.
+ */
+static int iavf_read_phc_indirect(struct iavf_adapter *adapter,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ long ret;
+ int err;
+
+ adapter->ptp.phc_time_ready = false;
+
+ ptp_read_system_prets(sts);
+
+ err = iavf_send_phc_read(adapter);
+ if (err)
+ return err;
+
+ ret = wait_event_interruptible_timeout(adapter->ptp.phc_time_waitqueue,
+ adapter->ptp.phc_time_ready,
+ HZ);
+
+ ptp_read_system_postts(sts);
+
+ if (ret < 0)
+ return ret;
+ else if (!ret)
+ return -EBUSY;
+
+ *ts = ns_to_timespec64(adapter->ptp.cached_phc_time);
+
+ return 0;
+}
+
+static int iavf_ptp_gettimex64(struct ptp_clock_info *info,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct iavf_adapter *adapter = iavf_clock_to_adapter(info);
+
+ if (!adapter->ptp.clock)
+ return -EOPNOTSUPP;
+
+ return iavf_read_phc_indirect(adapter, ts, sts);
+}
+
+/**
+ * iavf_ptp_cache_phc_time - Cache PHC time for performing timestamp extension
+ * @adapter: private adapter structure
+ *
+ * Periodically cache the PHC time in order to allow for timestamp extension.
+ * This is required because the Tx and Rx timestamps only contain 32bits of
+ * nanoseconds. Timestamp extension allows calculating the corrected 64bit
+ * timestamp. This algorithm relies on the cached time being within ~1 second
+ * of the timestamp.
+ */
+static void iavf_ptp_cache_phc_time(struct iavf_adapter *adapter)
+{
+ if (!time_is_before_jiffies(adapter->ptp.cached_phc_updated + HZ))
+ return;
+
+ /* The response from virtchnl will store the time into
+ * cached_phc_time.
+ */
+ iavf_send_phc_read(adapter);
+}
+
+/**
+ * iavf_ptp_do_aux_work - Perform periodic work required for PTP support
+ * @info: PTP clock info structure
+ *
+ * Handler to take care of periodic work required for PTP operation. This
+ * includes the following tasks:
+ *
+ * 1) updating cached_phc_time
+ *
+ * cached_phc_time is used by the Tx and Rx timestamp flows in order to
+ * perform timestamp extension, by carefully comparing the timestamp
+ * 32bit nanosecond timestamps and determining the corrected 64bit
+ * timestamp value to report to userspace. This algorithm only works if
+ * the cached_phc_time is within ~1 second of the Tx or Rx timestamp
+ * event. This task periodically reads the PHC time and stores it, to
+ * ensure that timestamp extension operates correctly.
+ *
+ * Returns: time in jiffies until the periodic task should be re-scheduled.
+ */
+static long iavf_ptp_do_aux_work(struct ptp_clock_info *info)
+{
+ struct iavf_adapter *adapter = iavf_clock_to_adapter(info);
+
+ iavf_ptp_cache_phc_time(adapter);
+
+ /* Check work about twice a second */
+ return msecs_to_jiffies(500);
+}
+
+/**
+ * iavf_ptp_register_clock - Register a new PTP for userspace
+ * @adapter: private adapter structure
+ *
+ * Allocate and register a new PTP clock device if necessary.
+ *
+ * Return: 0 if success, error otherwise.
+ */
+static int iavf_ptp_register_clock(struct iavf_adapter *adapter)
+{
+ struct ptp_clock_info *ptp_info = &adapter->ptp.info;
+ struct device *dev = &adapter->pdev->dev;
+ struct ptp_clock *clock;
+
+ snprintf(ptp_info->name, sizeof(ptp_info->name), "%s-%s-clk",
+ KBUILD_MODNAME, dev_name(dev));
+ ptp_info->owner = THIS_MODULE;
+ ptp_info->gettimex64 = iavf_ptp_gettimex64;
+ ptp_info->do_aux_work = iavf_ptp_do_aux_work;
+
+ clock = ptp_clock_register(ptp_info, dev);
+ if (IS_ERR(clock))
+ return PTR_ERR(clock);
+
+ adapter->ptp.clock = clock;
+
+ dev_dbg(&adapter->pdev->dev, "PTP clock %s registered\n",
+ adapter->ptp.info.name);
+
+ return 0;
+}
+
+/**
+ * iavf_ptp_init - Initialize PTP support if capability was negotiated
+ * @adapter: private adapter structure
+ *
+ * Initialize PTP functionality, based on the capabilities that the PF has
+ * enabled for this VF.
+ */
+void iavf_ptp_init(struct iavf_adapter *adapter)
+{
+ int err;
+
+ if (!iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_READ_PHC)) {
+ pci_notice(adapter->pdev,
+ "Device does not have PTP clock support\n");
+ return;
+ }
+
+ err = iavf_ptp_register_clock(adapter);
+ if (err) {
+ pci_err(adapter->pdev,
+ "Failed to register PTP clock device (%p)\n",
+ ERR_PTR(err));
+ return;
+ }
+
+ for (int i = 0; i < adapter->num_active_queues; i++) {
+ struct iavf_ring *rx_ring = &adapter->rx_rings[i];
+
+ rx_ring->ptp = &adapter->ptp;
+ }
+
+ ptp_schedule_worker(adapter->ptp.clock, 0);
+}
+
+/**
+ * iavf_ptp_release - Disable PTP support
+ * @adapter: private adapter structure
+ *
+ * Release all PTP resources that were previously initialized.
+ */
+void iavf_ptp_release(struct iavf_adapter *adapter)
+{
+ struct iavf_ptp_aq_cmd *cmd, *tmp;
+
+ if (!adapter->ptp.clock)
+ return;
+
+ pci_dbg(adapter->pdev, "removing PTP clock %s\n",
+ adapter->ptp.info.name);
+ ptp_clock_unregister(adapter->ptp.clock);
+ adapter->ptp.clock = NULL;
+
+ /* Cancel any remaining uncompleted PTP clock commands */
+ mutex_lock(&adapter->ptp.aq_cmd_lock);
+ list_for_each_entry_safe(cmd, tmp, &adapter->ptp.aq_cmds, list) {
+ list_del(&cmd->list);
+ kfree(cmd);
+ }
+ adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD;
+ mutex_unlock(&adapter->ptp.aq_cmd_lock);
+
+ adapter->ptp.hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+ iavf_ptp_disable_rx_tstamp(adapter);
+}
+
+/**
+ * iavf_ptp_process_caps - Handle change in PTP capabilities
+ * @adapter: private adapter structure
+ *
+ * Handle any state changes necessary due to change in PTP capabilities, such
+ * as after a device reset or change in configuration from the PF.
+ */
+void iavf_ptp_process_caps(struct iavf_adapter *adapter)
+{
+ bool phc = iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_READ_PHC);
+
+ /* Check if the device gained or lost necessary access to support the
+ * PTP hardware clock. If so, driver must respond appropriately by
+ * creating or destroying the PTP clock device.
+ */
+ if (adapter->ptp.clock && !phc)
+ iavf_ptp_release(adapter);
+ else if (!adapter->ptp.clock && phc)
+ iavf_ptp_init(adapter);
+
+ /* Check if the device lost access to Rx timestamp incoming packets */
+ if (!iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP)) {
+ adapter->ptp.hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+ iavf_ptp_disable_rx_tstamp(adapter);
+ }
+}
+
+/**
+ * iavf_ptp_extend_32b_timestamp - Convert a 32b nanoseconds timestamp to 64b
+ * nanoseconds
+ * @cached_phc_time: recently cached copy of PHC time
+ * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value
+ *
+ * Hardware captures timestamps which contain only 32 bits of nominal
+ * nanoseconds, as opposed to the 64bit timestamps that the stack expects.
+ *
+ * Extend the 32bit nanosecond timestamp using the following algorithm and
+ * assumptions:
+ *
+ * 1) have a recently cached copy of the PHC time
+ * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1
+ * seconds) before or after the PHC time was captured.
+ * 3) calculate the delta between the cached time and the timestamp
+ * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was
+ * captured after the PHC time. In this case, the full timestamp is just
+ * the cached PHC time plus the delta.
+ * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the
+ * timestamp was captured *before* the PHC time, i.e. because the PHC
+ * cache was updated after the timestamp was captured by hardware. In this
+ * case, the full timestamp is the cached time minus the inverse delta.
+ *
+ * This algorithm works even if the PHC time was updated after a Tx timestamp
+ * was requested, but before the Tx timestamp event was reported from
+ * hardware.
+ *
+ * This calculation primarily relies on keeping the cached PHC time up to
+ * date. If the timestamp was captured more than 2^31 nanoseconds after the
+ * PHC time, it is possible that the lower 32bits of PHC time have
+ * overflowed more than once, and we might generate an incorrect timestamp.
+ *
+ * This is prevented by (a) periodically updating the cached PHC time once
+ * a second, and (b) discarding any Tx timestamp packet if it has waited for
+ * a timestamp for more than one second.
+ *
+ * Return: extended timestamp (to 64b).
+ */
+u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, u32 in_tstamp)
+{
+ u32 low = lower_32_bits(cached_phc_time);
+ u32 delta = in_tstamp - low;
+ u64 ns;
+
+ /* Do not assume that the in_tstamp is always more recent than the
+ * cached PHC time. If the delta is large, it indicates that the
+ * in_tstamp was taken in the past, and should be converted
+ * forward.
+ */
+ if (delta > S32_MAX)
+ ns = cached_phc_time - (low - in_tstamp);
+ else
+ ns = cached_phc_time + delta;
+
+ return ns;
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.h b/drivers/net/ethernet/intel/iavf/iavf_ptp.h
new file mode 100644
index 000000000000..783b8f287cd9
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2024 Intel Corporation. */
+
+#ifndef _IAVF_PTP_H_
+#define _IAVF_PTP_H_
+
+#include "iavf_types.h"
+
+/* bit indicating whether a 40bit timestamp is valid */
+#define IAVF_PTP_40B_TSTAMP_VALID BIT(24)
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+void iavf_ptp_init(struct iavf_adapter *adapter);
+void iavf_ptp_release(struct iavf_adapter *adapter);
+void iavf_ptp_process_caps(struct iavf_adapter *adapter);
+bool iavf_ptp_cap_supported(const struct iavf_adapter *adapter, u32 cap);
+void iavf_virtchnl_send_ptp_cmd(struct iavf_adapter *adapter);
+int iavf_ptp_set_ts_config(struct iavf_adapter *adapter,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
+u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, u32 in_tstamp);
+#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+static inline void iavf_ptp_init(struct iavf_adapter *adapter) { }
+static inline void iavf_ptp_release(struct iavf_adapter *adapter) { }
+static inline void iavf_ptp_process_caps(struct iavf_adapter *adapter) { }
+static inline bool iavf_ptp_cap_supported(const struct iavf_adapter *adapter,
+ u32 cap)
+{
+ return false;
+}
+
+static inline void iavf_virtchnl_send_ptp_cmd(struct iavf_adapter *adapter) { }
+static inline int iavf_ptp_set_ts_config(struct iavf_adapter *adapter,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ return -1;
+}
+
+static inline u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time,
+ u32 in_tstamp)
+{
+ return 0;
+}
+
+#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+#endif /* _IAVF_PTP_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h
index 62212011c807..c5e4d1823886 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_trace.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h
@@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(
iavf_rx_template,
TP_PROTO(struct iavf_ring *ring,
- union iavf_32byte_rx_desc *desc,
+ struct iavf_rx_desc *desc,
struct sk_buff *skb),
TP_ARGS(ring, desc, skb),
@@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(
DEFINE_EVENT(
iavf_rx_template, iavf_clean_rx_irq,
TP_PROTO(struct iavf_ring *ring,
- union iavf_32byte_rx_desc *desc,
+ struct iavf_rx_desc *desc,
struct sk_buff *skb),
TP_ARGS(ring, desc, skb));
@@ -148,7 +148,7 @@ DEFINE_EVENT(
DEFINE_EVENT(
iavf_rx_template, iavf_clean_rx_irq_rx,
TP_PROTO(struct iavf_ring *ring,
- union iavf_32byte_rx_desc *desc,
+ struct iavf_rx_desc *desc,
struct sk_buff *skb),
TP_ARGS(ring, desc, skb));
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 26b424fd6718..422312b8b54a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -8,6 +8,26 @@
#include "iavf.h"
#include "iavf_trace.h"
#include "iavf_prototype.h"
+#include "iavf_ptp.h"
+
+/**
+ * iavf_is_descriptor_done - tests DD bit in Rx descriptor
+ * @qw1: quad word 1 from descriptor to get Descriptor Done field from
+ * @flex: is the descriptor flex or legacy
+ *
+ * This function tests the descriptor done bit in specified descriptor. Because
+ * there are two types of descriptors (legacy and flex) the parameter rx_ring
+ * is used to distinguish.
+ *
+ * Return: true or false based on the state of DD bit in Rx descriptor.
+ */
+static bool iavf_is_descriptor_done(u64 qw1, bool flex)
+{
+ if (flex)
+ return FIELD_GET(IAVF_RXD_FLEX_DD_M, qw1);
+ else
+ return FIELD_GET(IAVF_RXD_LEGACY_DD_M, qw1);
+}
static __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
u32 td_tag)
@@ -766,7 +786,7 @@ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring)
u64_stats_init(&rx_ring->syncp);
/* Round up to nearest 4K */
- rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc);
+ rx_ring->size = rx_ring->count * sizeof(struct iavf_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
@@ -845,7 +865,7 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
.count = rx_ring->count,
};
u16 ntu = rx_ring->next_to_use;
- union iavf_rx_desc *rx_desc;
+ struct iavf_rx_desc *rx_desc;
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
@@ -863,7 +883,7 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
/* Refresh the desc even if buffer_addrs didn't change
* because each write-back erases this info.
*/
- rx_desc->read.pkt_addr = cpu_to_le64(addr);
+ rx_desc->qw0 = cpu_to_le64(addr);
rx_desc++;
ntu++;
@@ -873,7 +893,7 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
}
/* clear the status bits for the next_to_use descriptor */
- rx_desc->wb.qword1.status_error_len = 0;
+ rx_desc->qw1 = 0;
cleaned_count--;
} while (cleaned_count);
@@ -896,60 +916,43 @@ no_buffers:
}
/**
- * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum
+ * iavf_rx_csum - Indicate in skb if hw indicated a good checksum
* @vsi: the VSI we care about
* @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
+ * @decoded_pt: decoded ptype information
+ * @csum_bits: decoded Rx descriptor information
**/
-static void iavf_rx_checksum(struct iavf_vsi *vsi,
- struct sk_buff *skb,
- union iavf_rx_desc *rx_desc)
+static void iavf_rx_csum(const struct iavf_vsi *vsi, struct sk_buff *skb,
+ struct libeth_rx_pt decoded_pt,
+ struct libeth_rx_csum csum_bits)
{
- struct libeth_rx_pt decoded;
- u32 rx_error, rx_status;
bool ipv4, ipv6;
- u8 ptype;
- u64 qword;
skb->ip_summed = CHECKSUM_NONE;
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
-
- decoded = libie_rx_pt_parse(ptype);
- if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded))
- return;
-
- rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword);
- rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword);
-
/* did the hardware decode the packet and checksum? */
- if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
+ if (unlikely(!csum_bits.l3l4p))
return;
- ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4;
- ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6;
+ ipv4 = libeth_rx_pt_get_ip_ver(decoded_pt) == LIBETH_RX_PT_OUTER_IPV4;
+ ipv6 = libeth_rx_pt_get_ip_ver(decoded_pt) == LIBETH_RX_PT_OUTER_IPV6;
- if (ipv4 &&
- (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) |
- BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT))))
+ if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe)))
goto checksum_fail;
/* likely incorrect csum if alternate IP extension headers found */
- if (ipv6 &&
- rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT))
- /* don't increment checksum err here, non-fatal err */
+ if (unlikely(ipv6 && csum_bits.ipv6exadd))
return;
/* there was some L4 error, count error and punt packet to the stack */
- if (rx_error & BIT(IAVF_RX_DESC_ERROR_L4E_SHIFT))
+ if (unlikely(csum_bits.l4e))
goto checksum_fail;
/* handle packets that were not able to be checksummed due
* to arrival speed, in this case the stack can compute
* the csum.
*/
- if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT))
+ if (unlikely(csum_bits.pprs))
return;
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -960,52 +963,196 @@ checksum_fail:
}
/**
- * iavf_rx_hash - set the hash value in the skb
+ * iavf_legacy_rx_csum - Indicate in skb if hw indicated a good checksum
+ * @vsi: the VSI we care about
+ * @qw1: quad word 1
+ * @decoded_pt: decoded packet type
+ *
+ * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
+ * descriptor writeback format.
+ *
+ * Return: decoded checksum bits.
+ **/
+static struct libeth_rx_csum
+iavf_legacy_rx_csum(const struct iavf_vsi *vsi, u64 qw1,
+ const struct libeth_rx_pt decoded_pt)
+{
+ struct libeth_rx_csum csum_bits = {};
+
+ if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded_pt))
+ return csum_bits;
+
+ csum_bits.ipe = FIELD_GET(IAVF_RXD_LEGACY_IPE_M, qw1);
+ csum_bits.eipe = FIELD_GET(IAVF_RXD_LEGACY_EIPE_M, qw1);
+ csum_bits.l4e = FIELD_GET(IAVF_RXD_LEGACY_L4E_M, qw1);
+ csum_bits.pprs = FIELD_GET(IAVF_RXD_LEGACY_PPRS_M, qw1);
+ csum_bits.l3l4p = FIELD_GET(IAVF_RXD_LEGACY_L3L4P_M, qw1);
+ csum_bits.ipv6exadd = FIELD_GET(IAVF_RXD_LEGACY_IPV6EXADD_M, qw1);
+
+ return csum_bits;
+}
+
+/**
+ * iavf_flex_rx_csum - Indicate in skb if hw indicated a good checksum
+ * @vsi: the VSI we care about
+ * @qw1: quad word 1
+ * @decoded_pt: decoded packet type
+ *
+ * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ *
+ * Return: decoded checksum bits.
+ **/
+static struct libeth_rx_csum
+iavf_flex_rx_csum(const struct iavf_vsi *vsi, u64 qw1,
+ const struct libeth_rx_pt decoded_pt)
+{
+ struct libeth_rx_csum csum_bits = {};
+
+ if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded_pt))
+ return csum_bits;
+
+ csum_bits.ipe = FIELD_GET(IAVF_RXD_FLEX_XSUM_IPE_M, qw1);
+ csum_bits.eipe = FIELD_GET(IAVF_RXD_FLEX_XSUM_EIPE_M, qw1);
+ csum_bits.l4e = FIELD_GET(IAVF_RXD_FLEX_XSUM_L4E_M, qw1);
+ csum_bits.eudpe = FIELD_GET(IAVF_RXD_FLEX_XSUM_EUDPE_M, qw1);
+ csum_bits.l3l4p = FIELD_GET(IAVF_RXD_FLEX_L3L4P_M, qw1);
+ csum_bits.ipv6exadd = FIELD_GET(IAVF_RXD_FLEX_IPV6EXADD_M, qw1);
+ csum_bits.nat = FIELD_GET(IAVF_RXD_FLEX_NAT_M, qw1);
+
+ return csum_bits;
+}
+
+/**
+ * iavf_legacy_rx_hash - set the hash value in the skb
+ * @ring: descriptor ring
+ * @qw0: quad word 0
+ * @qw1: quad word 1
+ * @skb: skb currently being received and modified
+ * @decoded_pt: decoded packet type
+ *
+ * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
+ * descriptor writeback format.
+ **/
+static void iavf_legacy_rx_hash(const struct iavf_ring *ring, __le64 qw0,
+ __le64 qw1, struct sk_buff *skb,
+ const struct libeth_rx_pt decoded_pt)
+{
+ const __le64 rss_mask = cpu_to_le64(IAVF_RXD_LEGACY_FLTSTAT_M);
+ u32 hash;
+
+ if (!libeth_rx_pt_has_hash(ring->netdev, decoded_pt))
+ return;
+
+ if ((qw1 & rss_mask) == rss_mask) {
+ hash = le64_get_bits(qw0, IAVF_RXD_LEGACY_RSS_M);
+ libeth_rx_pt_set_hash(skb, hash, decoded_pt);
+ }
+}
+
+/**
+ * iavf_flex_rx_hash - set the hash value in the skb
* @ring: descriptor ring
- * @rx_desc: specific descriptor
+ * @qw1: quad word 1
* @skb: skb currently being received and modified
- * @rx_ptype: Rx packet type
+ * @decoded_pt: decoded packet type
+ *
+ * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
**/
-static void iavf_rx_hash(struct iavf_ring *ring,
- union iavf_rx_desc *rx_desc,
- struct sk_buff *skb,
- u8 rx_ptype)
+static void iavf_flex_rx_hash(const struct iavf_ring *ring, __le64 qw1,
+ struct sk_buff *skb,
+ const struct libeth_rx_pt decoded_pt)
{
- struct libeth_rx_pt decoded;
+ bool rss_valid;
u32 hash;
- const __le64 rss_mask =
- cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
- IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT);
- decoded = libie_rx_pt_parse(rx_ptype);
- if (!libeth_rx_pt_has_hash(ring->netdev, decoded))
+ if (!libeth_rx_pt_has_hash(ring->netdev, decoded_pt))
return;
- if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
- hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
- libeth_rx_pt_set_hash(skb, hash, decoded);
+ rss_valid = le64_get_bits(qw1, IAVF_RXD_FLEX_RSS_VALID_M);
+ if (rss_valid) {
+ hash = le64_get_bits(qw1, IAVF_RXD_FLEX_RSS_HASH_M);
+ libeth_rx_pt_set_hash(skb, hash, decoded_pt);
}
}
/**
+ * iavf_flex_rx_tstamp - Capture Rx timestamp from the descriptor
+ * @rx_ring: descriptor ring
+ * @qw2: quad word 2 of descriptor
+ * @qw3: quad word 3 of descriptor
+ * @skb: skb currently being received
+ *
+ * Read the Rx timestamp value from the descriptor and pass it to the stack.
+ *
+ * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ */
+static void iavf_flex_rx_tstamp(const struct iavf_ring *rx_ring, __le64 qw2,
+ __le64 qw3, struct sk_buff *skb)
+{
+ u32 tstamp;
+ u64 ns;
+
+ /* Skip processing if timestamps aren't enabled */
+ if (!(rx_ring->flags & IAVF_TXRX_FLAGS_HW_TSTAMP))
+ return;
+
+ /* Check if this Rx descriptor has a valid timestamp */
+ if (!le64_get_bits(qw2, IAVF_PTP_40B_TSTAMP_VALID))
+ return;
+
+ /* the ts_low field only contains the valid bit and sub-nanosecond
+ * precision, so we don't need to extract it.
+ */
+ tstamp = le64_get_bits(qw3, IAVF_RXD_FLEX_QW3_TSTAMP_HIGH_M);
+
+ ns = iavf_ptp_extend_32b_timestamp(rx_ring->ptp->cached_phc_time,
+ tstamp);
+
+ *skb_hwtstamps(skb) = (struct skb_shared_hwtstamps) {
+ .hwtstamp = ns_to_ktime(ns),
+ };
+}
+
+/**
* iavf_process_skb_fields - Populate skb header fields from Rx descriptor
* @rx_ring: rx descriptor ring packet is being transacted on
* @rx_desc: pointer to the EOP Rx descriptor
* @skb: pointer to current skb being populated
- * @rx_ptype: the packet type decoded by hardware
+ * @ptype: the packet type decoded by hardware
+ * @flex: is the descriptor flex or legacy
*
* This function checks the ring, descriptor, and packet information in
* order to populate the hash, checksum, VLAN, protocol, and
* other fields within the skb.
**/
-static void
-iavf_process_skb_fields(struct iavf_ring *rx_ring,
- union iavf_rx_desc *rx_desc, struct sk_buff *skb,
- u8 rx_ptype)
+static void iavf_process_skb_fields(const struct iavf_ring *rx_ring,
+ const struct iavf_rx_desc *rx_desc,
+ struct sk_buff *skb, u32 ptype,
+ bool flex)
{
- iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
-
- iavf_rx_checksum(rx_ring->vsi, skb, rx_desc);
+ struct libeth_rx_csum csum_bits;
+ struct libeth_rx_pt decoded_pt;
+ __le64 qw0 = rx_desc->qw0;
+ __le64 qw1 = rx_desc->qw1;
+ __le64 qw2 = rx_desc->qw2;
+ __le64 qw3 = rx_desc->qw3;
+
+ decoded_pt = libie_rx_pt_parse(ptype);
+
+ if (flex) {
+ iavf_flex_rx_hash(rx_ring, qw1, skb, decoded_pt);
+ iavf_flex_rx_tstamp(rx_ring, qw2, qw3, skb);
+ csum_bits = iavf_flex_rx_csum(rx_ring->vsi, le64_to_cpu(qw1),
+ decoded_pt);
+ } else {
+ iavf_legacy_rx_hash(rx_ring, qw0, qw1, skb, decoded_pt);
+ csum_bits = iavf_legacy_rx_csum(rx_ring->vsi, le64_to_cpu(qw1),
+ decoded_pt);
+ }
+ iavf_rx_csum(rx_ring->vsi, skb, decoded_pt, csum_bits);
skb_record_rx_queue(skb, rx_ring->queue_index);
@@ -1092,8 +1239,7 @@ static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer,
/**
* iavf_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
- * @rx_desc: Rx descriptor for current buffer
- * @skb: Current socket buffer containing buffer in progress
+ * @fields: Rx descriptor extracted fields
*
* This function updates next to clean. If the buffer is an EOP buffer
* this function exits returning false, otherwise it will place the
@@ -1101,8 +1247,7 @@ static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer,
* that this is in fact a non-EOP buffer.
**/
static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
- union iavf_rx_desc *rx_desc,
- struct sk_buff *skb)
+ struct libeth_rqe_info fields)
{
u32 ntc = rx_ring->next_to_clean + 1;
@@ -1113,8 +1258,7 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
prefetch(IAVF_RX_DESC(rx_ring, ntc));
/* if we are the last buffer then there is nothing else to do */
-#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT)
- if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF)))
+ if (likely(fields.eop))
return false;
rx_ring->rx_stats.non_eop_descs++;
@@ -1123,6 +1267,109 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
}
/**
+ * iavf_extract_legacy_rx_fields - Extract fields from the Rx descriptor
+ * @rx_ring: rx descriptor ring
+ * @rx_desc: the descriptor to process
+ *
+ * Decode the Rx descriptor and extract relevant information including the
+ * size, VLAN tag, Rx packet type, end of packet field and RXE field value.
+ *
+ * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
+ * descriptor writeback format.
+ *
+ * Return: fields extracted from the Rx descriptor.
+ */
+static struct libeth_rqe_info
+iavf_extract_legacy_rx_fields(const struct iavf_ring *rx_ring,
+ const struct iavf_rx_desc *rx_desc)
+{
+ u64 qw0 = le64_to_cpu(rx_desc->qw0);
+ u64 qw1 = le64_to_cpu(rx_desc->qw1);
+ u64 qw2 = le64_to_cpu(rx_desc->qw2);
+ struct libeth_rqe_info fields;
+ bool l2tag1p, l2tag2p;
+
+ fields.eop = FIELD_GET(IAVF_RXD_LEGACY_EOP_M, qw1);
+ fields.len = FIELD_GET(IAVF_RXD_LEGACY_LENGTH_M, qw1);
+
+ if (!fields.eop)
+ return fields;
+
+ fields.rxe = FIELD_GET(IAVF_RXD_LEGACY_RXE_M, qw1);
+ fields.ptype = FIELD_GET(IAVF_RXD_LEGACY_PTYPE_M, qw1);
+ fields.vlan = 0;
+
+ if (rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1) {
+ l2tag1p = FIELD_GET(IAVF_RXD_LEGACY_L2TAG1P_M, qw1);
+ if (l2tag1p)
+ fields.vlan = FIELD_GET(IAVF_RXD_LEGACY_L2TAG1_M, qw0);
+ } else if (rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) {
+ l2tag2p = FIELD_GET(IAVF_RXD_LEGACY_L2TAG2P_M, qw2);
+ if (l2tag2p)
+ fields.vlan = FIELD_GET(IAVF_RXD_LEGACY_L2TAG2_M, qw2);
+ }
+
+ return fields;
+}
+
+/**
+ * iavf_extract_flex_rx_fields - Extract fields from the Rx descriptor
+ * @rx_ring: rx descriptor ring
+ * @rx_desc: the descriptor to process
+ *
+ * Decode the Rx descriptor and extract relevant information including the
+ * size, VLAN tag, Rx packet type, end of packet field and RXE field value.
+ *
+ * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ *
+ * Return: fields extracted from the Rx descriptor.
+ */
+static struct libeth_rqe_info
+iavf_extract_flex_rx_fields(const struct iavf_ring *rx_ring,
+ const struct iavf_rx_desc *rx_desc)
+{
+ struct libeth_rqe_info fields = {};
+ u64 qw0 = le64_to_cpu(rx_desc->qw0);
+ u64 qw1 = le64_to_cpu(rx_desc->qw1);
+ u64 qw2 = le64_to_cpu(rx_desc->qw2);
+ bool l2tag1p, l2tag2p;
+
+ fields.eop = FIELD_GET(IAVF_RXD_FLEX_EOP_M, qw1);
+ fields.len = FIELD_GET(IAVF_RXD_FLEX_PKT_LEN_M, qw0);
+
+ if (!fields.eop)
+ return fields;
+
+ fields.rxe = FIELD_GET(IAVF_RXD_FLEX_RXE_M, qw1);
+ fields.ptype = FIELD_GET(IAVF_RXD_FLEX_PTYPE_M, qw0);
+ fields.vlan = 0;
+
+ if (rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1) {
+ l2tag1p = FIELD_GET(IAVF_RXD_FLEX_L2TAG1P_M, qw1);
+ if (l2tag1p)
+ fields.vlan = FIELD_GET(IAVF_RXD_FLEX_L2TAG1_M, qw1);
+ } else if (rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) {
+ l2tag2p = FIELD_GET(IAVF_RXD_FLEX_L2TAG2P_M, qw2);
+ if (l2tag2p)
+ fields.vlan = FIELD_GET(IAVF_RXD_FLEX_L2TAG2_2_M, qw2);
+ }
+
+ return fields;
+}
+
+static struct libeth_rqe_info
+iavf_extract_rx_fields(const struct iavf_ring *rx_ring,
+ const struct iavf_rx_desc *rx_desc,
+ bool flex)
+{
+ if (flex)
+ return iavf_extract_flex_rx_fields(rx_ring, rx_desc);
+ else
+ return iavf_extract_legacy_rx_fields(rx_ring, rx_desc);
+}
+
+/**
* iavf_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
* @budget: Total limit on number of packets to process
@@ -1136,18 +1383,17 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
**/
static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
{
+ bool flex = rx_ring->rxdid == VIRTCHNL_RXDID_2_FLEX_SQ_NIC;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = IAVF_DESC_UNUSED(rx_ring);
bool failure = false;
while (likely(total_rx_packets < (unsigned int)budget)) {
+ struct libeth_rqe_info fields;
struct libeth_fqe *rx_buffer;
- union iavf_rx_desc *rx_desc;
- unsigned int size;
- u16 vlan_tag = 0;
- u8 rx_ptype;
- u64 qword;
+ struct iavf_rx_desc *rx_desc;
+ u64 qw1;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IAVF_RX_BUFFER_WRITE) {
@@ -1158,35 +1404,32 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
- /* status_error_len will always be zero for unused descriptors
- * because it's cleared in cleanup, and overlaps with hdr_addr
- * which is always zero because packet split isn't used, if the
- * hardware wrote DD then the length will be non-zero
- */
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we have
* verified the descriptor has been written back.
*/
dma_rmb();
-#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT)
- if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD))
+
+ qw1 = le64_to_cpu(rx_desc->qw1);
+ /* If DD field (descriptor done) is unset then other fields are
+ * not valid
+ */
+ if (!iavf_is_descriptor_done(qw1, flex))
break;
- size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword);
+ fields = iavf_extract_rx_fields(rx_ring, rx_desc, flex);
iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb);
rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean];
- if (!libeth_rx_sync_for_cpu(rx_buffer, size))
+ if (!libeth_rx_sync_for_cpu(rx_buffer, fields.len))
goto skip_data;
/* retrieve a buffer from the ring */
if (skb)
- iavf_add_rx_frag(skb, rx_buffer, size);
+ iavf_add_rx_frag(skb, rx_buffer, fields.len);
else
- skb = iavf_build_skb(rx_buffer, size);
+ skb = iavf_build_skb(rx_buffer, fields.len);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -1197,15 +1440,14 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
skip_data:
cleaned_count++;
- if (iavf_is_non_eop(rx_ring, rx_desc, skb) || unlikely(!skb))
+ if (iavf_is_non_eop(rx_ring, fields) || unlikely(!skb))
continue;
- /* ERR_MASK will only have valid bits if EOP set, and
- * what we are doing here is actually checking
- * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
- * the error field
+ /* RXE field in descriptor is an indication of the MAC errors
+ * (like CRC, alignment, oversize etc). If it is set then iavf
+ * should finish.
*/
- if (unlikely(iavf_test_staterr(rx_desc, BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) {
+ if (unlikely(fields.rxe)) {
dev_kfree_skb_any(skb);
skb = NULL;
continue;
@@ -1219,22 +1461,11 @@ skip_data:
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
-
/* populate checksum, VLAN, and protocol */
- iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
-
- if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) &&
- rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
- vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
- if (rx_desc->wb.qword2.ext_status &
- cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&
- rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
- vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
+ iavf_process_skb_fields(rx_ring, rx_desc, skb, fields.ptype, flex);
iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
- iavf_receive_skb(rx_ring, skb, vlan_tag);
+ iavf_receive_skb(rx_ring, skb, fields.vlan);
skb = NULL;
/* update budget accounting */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
index f97c702c0802..79ad554f2d53 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
@@ -80,25 +80,6 @@ enum iavf_dyn_idx_t {
BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
-#define iavf_rx_desc iavf_32byte_rx_desc
-
-/**
- * iavf_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static inline bool iavf_test_staterr(union iavf_rx_desc *rx_desc,
- const u64 stat_err_bits)
-{
- return !!(rx_desc->wb.qword1.status_error_len &
- cpu_to_le64(stat_err_bits));
-}
-
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define IAVF_RX_INCREMENT(r, i) \
do { \
@@ -262,6 +243,8 @@ struct iavf_ring {
u16 next_to_use;
u16 next_to_clean;
+ u16 rxdid; /* Rx descriptor format */
+
u16 flags;
#define IAVF_TXR_FLAGS_WB_ON_ITR BIT(0)
#define IAVF_TXR_FLAGS_ARM_WB BIT(1)
@@ -269,6 +252,7 @@ struct iavf_ring {
#define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(3)
#define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(4)
#define IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2 BIT(5)
+#define IAVF_TXRX_FLAGS_HW_TSTAMP BIT(6)
/* stats structs */
struct iavf_queue_stats stats;
@@ -295,6 +279,8 @@ struct iavf_ring {
* for this ring.
*/
+ struct iavf_ptp *ptp;
+
u32 rx_buf_len;
struct net_shaper q_shaper;
bool q_shaper_update;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
index f6b09e57abce..f9e1319620f4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_type.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
@@ -178,110 +178,116 @@ struct iavf_hw {
char err_str[16];
};
-/* RX Descriptors */
-union iavf_16byte_rx_desc {
- struct {
- __le64 pkt_addr; /* Packet buffer address */
- __le64 hdr_addr; /* Header buffer address */
- } read;
- struct {
- struct {
- struct {
- union {
- __le16 mirroring_status;
- __le16 fcoe_ctx_id;
- } mirr_fcoe;
- __le16 l2tag1;
- } lo_dword;
- union {
- __le32 rss; /* RSS Hash */
- __le32 fd_id; /* Flow director filter id */
- __le32 fcoe_param; /* FCoE DDP Context id */
- } hi_dword;
- } qword0;
- struct {
- /* ext status/error/pktype/length */
- __le64 status_error_len;
- } qword1;
- } wb; /* writeback */
-};
-
-union iavf_32byte_rx_desc {
- struct {
- __le64 pkt_addr; /* Packet buffer address */
- __le64 hdr_addr; /* Header buffer address */
- /* bit 0 of hdr_buffer_addr is DD bit */
- __le64 rsvd1;
- __le64 rsvd2;
- } read;
- struct {
- struct {
- struct {
- union {
- __le16 mirroring_status;
- __le16 fcoe_ctx_id;
- } mirr_fcoe;
- __le16 l2tag1;
- } lo_dword;
- union {
- __le32 rss; /* RSS Hash */
- __le32 fcoe_param; /* FCoE DDP Context id */
- /* Flow director filter id in case of
- * Programming status desc WB
- */
- __le32 fd_id;
- } hi_dword;
- } qword0;
- struct {
- /* status/error/pktype/length */
- __le64 status_error_len;
- } qword1;
- struct {
- __le16 ext_status; /* extended status */
- __le16 rsvd;
- __le16 l2tag2_1;
- __le16 l2tag2_2;
- } qword2;
- struct {
- union {
- __le32 flex_bytes_lo;
- __le32 pe_status;
- } lo_dword;
- union {
- __le32 flex_bytes_hi;
- __le32 fd_id;
- } hi_dword;
- } qword3;
- } wb; /* writeback */
-};
-
-enum iavf_rx_desc_status_bits {
- /* Note: These are predefined bit offsets */
- IAVF_RX_DESC_STATUS_DD_SHIFT = 0,
- IAVF_RX_DESC_STATUS_EOF_SHIFT = 1,
- IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT = 2,
- IAVF_RX_DESC_STATUS_L3L4P_SHIFT = 3,
- IAVF_RX_DESC_STATUS_CRCP_SHIFT = 4,
- IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT = 5, /* 2 BITS */
- IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT = 7,
- /* Note: Bit 8 is reserved in X710 and XL710 */
- IAVF_RX_DESC_STATUS_EXT_UDP_0_SHIFT = 8,
- IAVF_RX_DESC_STATUS_UMBCAST_SHIFT = 9, /* 2 BITS */
- IAVF_RX_DESC_STATUS_FLM_SHIFT = 11,
- IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT = 12, /* 2 BITS */
- IAVF_RX_DESC_STATUS_LPBK_SHIFT = 14,
- IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT = 15,
- IAVF_RX_DESC_STATUS_RESERVED_SHIFT = 16, /* 2 BITS */
- /* Note: For non-tunnel packets INT_UDP_0 is the right status for
- * UDP header
- */
- IAVF_RX_DESC_STATUS_INT_UDP_0_SHIFT = 18,
- IAVF_RX_DESC_STATUS_LAST /* this entry must be last!!! */
-};
-
-#define IAVF_RXD_QW1_STATUS_SHIFT 0
-#define IAVF_RXD_QW1_STATUS_MASK ((BIT(IAVF_RX_DESC_STATUS_LAST) - 1) \
- << IAVF_RXD_QW1_STATUS_SHIFT)
+/**
+ * struct iavf_rx_desc - Receive descriptor (both legacy and flexible)
+ * @qw0: quad word 0 fields:
+ * Legacy: Descriptor Type; Mirror ID; L2TAG1P (S-TAG); Filter Status
+ * Flex: Descriptor Type; Mirror ID; UMBCAST; Packet Type; Flexible Flags
+ * Section 0; Packet Length; Header Length; Split Header Flag;
+ * Flexible Flags section 1 / Extended Status
+ * @qw1: quad word 1 fields:
+ * Legacy: Status Field; Error Field; Packet Type; Packet Length (packet,
+ * header, Split Header Flag)
+ * Flex: Status / Error 0 Field; L2TAG1P (S-TAG); Flexible Metadata
+ * Container #0; Flexible Metadata Container #1
+ * @qw2: quad word 2 fields:
+ * Legacy: Extended Status; 1st L2TAG2P (C-TAG); 2nd L2TAG2P (C-TAG)
+ * Flex: Status / Error 1 Field; Flexible Flags section 2; Timestamp Low;
+ * 1st L2TAG2 (C-TAG); 2nd L2TAG2 (C-TAG)
+ * @qw3: quad word 3 fields:
+ * Legacy: FD Filter ID / Flexible Bytes
+ * Flex: Flexible Metadata Container #2; Flexible Metadata Container #3;
+ * Flexible Metadata Container #4 / Timestamp High 0; Flexible
+ * Metadata Container #5 / Timestamp High 1;
+ */
+struct iavf_rx_desc {
+ aligned_le64 qw0;
+/* The hash signature (RSS) */
+#define IAVF_RXD_LEGACY_RSS_M GENMASK_ULL(63, 32)
+/* Stripped C-TAG VLAN from the receive packet */
+#define IAVF_RXD_LEGACY_L2TAG1_M GENMASK_ULL(33, 16)
+/* Packet type */
+#define IAVF_RXD_FLEX_PTYPE_M GENMASK_ULL(25, 16)
+/* Packet length */
+#define IAVF_RXD_FLEX_PKT_LEN_M GENMASK_ULL(45, 32)
+
+ aligned_le64 qw1;
+/* Descriptor done indication flag. */
+#define IAVF_RXD_LEGACY_DD_M BIT(0)
+/* End of packet. Set to 1 if this descriptor is the last one of the packet */
+#define IAVF_RXD_LEGACY_EOP_M BIT(1)
+/* L2 TAG 1 presence indication */
+#define IAVF_RXD_LEGACY_L2TAG1P_M BIT(2)
+/* Detectable L3 and L4 integrity check is processed by the HW */
+#define IAVF_RXD_LEGACY_L3L4P_M BIT(3)
+/* Set when an IPv6 packet contains a Destination Options Header or a Routing
+ * Header.
+ */
+#define IAVF_RXD_LEGACY_IPV6EXADD_M BIT(15)
+/* Receive MAC Errors: CRC; Alignment; Oversize; Undersizes; Length error */
+#define IAVF_RXD_LEGACY_RXE_M BIT(19)
+/* Checksum reports:
+ * - IPE: IP checksum error
+ * - L4E: L4 integrity error
+ * - EIPE: External IP header (tunneled packets)
+ */
+#define IAVF_RXD_LEGACY_IPE_M BIT(22)
+#define IAVF_RXD_LEGACY_L4E_M BIT(23)
+#define IAVF_RXD_LEGACY_EIPE_M BIT(24)
+/* Set for packets that skip checksum calculation in pre-parser */
+#define IAVF_RXD_LEGACY_PPRS_M BIT(26)
+/* Indicates the content in the Filter Status field */
+#define IAVF_RXD_LEGACY_FLTSTAT_M GENMASK_ULL(13, 12)
+/* Packet type */
+#define IAVF_RXD_LEGACY_PTYPE_M GENMASK_ULL(37, 30)
+/* Packet length */
+#define IAVF_RXD_LEGACY_LENGTH_M GENMASK_ULL(51, 38)
+/* Descriptor done indication flag */
+#define IAVF_RXD_FLEX_DD_M BIT(0)
+/* End of packet. Set to 1 if this descriptor is the last one of the packet */
+#define IAVF_RXD_FLEX_EOP_M BIT(1)
+/* Detectable L3 and L4 integrity check is processed by the HW */
+#define IAVF_RXD_FLEX_L3L4P_M BIT(3)
+/* Checksum reports:
+ * - IPE: IP checksum error
+ * - L4E: L4 integrity error
+ * - EIPE: External IP header (tunneled packets)
+ * - EUDPE: External UDP checksum error (tunneled packets)
+ */
+#define IAVF_RXD_FLEX_XSUM_IPE_M BIT(4)
+#define IAVF_RXD_FLEX_XSUM_L4E_M BIT(5)
+#define IAVF_RXD_FLEX_XSUM_EIPE_M BIT(6)
+#define IAVF_RXD_FLEX_XSUM_EUDPE_M BIT(7)
+/* Set when an IPv6 packet contains a Destination Options Header or a Routing
+ * Header.
+ */
+#define IAVF_RXD_FLEX_IPV6EXADD_M BIT(9)
+/* Receive MAC Errors: CRC; Alignment; Oversize; Undersizes; Length error */
+#define IAVF_RXD_FLEX_RXE_M BIT(10)
+/* Indicates that the RSS/HASH result is valid */
+#define IAVF_RXD_FLEX_RSS_VALID_M BIT(12)
+/* L2 TAG 1 presence indication */
+#define IAVF_RXD_FLEX_L2TAG1P_M BIT(13)
+/* Stripped L2 Tag from the receive packet */
+#define IAVF_RXD_FLEX_L2TAG1_M GENMASK_ULL(31, 16)
+/* The hash signature (RSS) */
+#define IAVF_RXD_FLEX_RSS_HASH_M GENMASK_ULL(63, 32)
+
+ aligned_le64 qw2;
+/* L2 Tag 2 Presence */
+#define IAVF_RXD_LEGACY_L2TAG2P_M BIT(0)
+/* Stripped S-TAG VLAN from the receive packet */
+#define IAVF_RXD_LEGACY_L2TAG2_M GENMASK_ULL(63, 32)
+/* Stripped S-TAG VLAN from the receive packet */
+#define IAVF_RXD_FLEX_L2TAG2_2_M GENMASK_ULL(63, 48)
+/* The packet is a UDP tunneled packet */
+#define IAVF_RXD_FLEX_NAT_M BIT(4)
+/* L2 Tag 2 Presence */
+#define IAVF_RXD_FLEX_L2TAG2P_M BIT(11)
+ aligned_le64 qw3;
+#define IAVF_RXD_FLEX_QW3_TSTAMP_HIGH_M GENMASK_ULL(63, 32)
+} __aligned(4 * sizeof(__le64));
+static_assert(sizeof(struct iavf_rx_desc) == 32);
#define IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT
#define IAVF_RXD_QW1_STATUS_TSYNINDX_MASK (0x3UL << \
@@ -298,22 +304,6 @@ enum iavf_rx_desc_fltstat_values {
IAVF_RX_DESC_FLTSTAT_RSS_HASH = 3,
};
-#define IAVF_RXD_QW1_ERROR_SHIFT 19
-#define IAVF_RXD_QW1_ERROR_MASK (0xFFUL << IAVF_RXD_QW1_ERROR_SHIFT)
-
-enum iavf_rx_desc_error_bits {
- /* Note: These are predefined bit offsets */
- IAVF_RX_DESC_ERROR_RXE_SHIFT = 0,
- IAVF_RX_DESC_ERROR_RECIPE_SHIFT = 1,
- IAVF_RX_DESC_ERROR_HBO_SHIFT = 2,
- IAVF_RX_DESC_ERROR_L3L4E_SHIFT = 3, /* 3 BITS */
- IAVF_RX_DESC_ERROR_IPE_SHIFT = 3,
- IAVF_RX_DESC_ERROR_L4E_SHIFT = 4,
- IAVF_RX_DESC_ERROR_EIPE_SHIFT = 5,
- IAVF_RX_DESC_ERROR_OVERSIZE_SHIFT = 6,
- IAVF_RX_DESC_ERROR_PPRS_SHIFT = 7
-};
-
enum iavf_rx_desc_error_l3l4e_fcoe_masks {
IAVF_RX_DESC_ERROR_L3L4E_NONE = 0,
IAVF_RX_DESC_ERROR_L3L4E_PROT = 1,
@@ -322,13 +312,6 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks {
IAVF_RX_DESC_ERROR_L3L4E_DMAC_WARN = 4
};
-#define IAVF_RXD_QW1_PTYPE_SHIFT 30
-#define IAVF_RXD_QW1_PTYPE_MASK (0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT)
-
-#define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT 38
-#define IAVF_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \
- IAVF_RXD_QW1_LENGTH_PBUF_SHIFT)
-
#define IAVF_RXD_QW1_LENGTH_HBUF_SHIFT 52
#define IAVF_RXD_QW1_LENGTH_HBUF_MASK (0x7FFULL << \
IAVF_RXD_QW1_LENGTH_HBUF_SHIFT)
@@ -347,6 +330,8 @@ enum iavf_rx_desc_ext_status_bits {
IAVF_RX_DESC_EXT_STATUS_PELONGB_SHIFT = 11,
};
+#define IAVF_RX_DESC_EXT_STATUS_L2TAG2P_M BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)
+
enum iavf_rx_desc_pe_status_bits {
/* Note: These are predefined bit offsets */
IAVF_RX_DESC_PE_STATUS_QPID_SHIFT = 0, /* 18 BITS */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_types.h b/drivers/net/ethernet/intel/iavf/iavf_types.h
new file mode 100644
index 000000000000..a095855122bf
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2024 Intel Corporation. */
+
+#ifndef _IAVF_TYPES_H_
+#define _IAVF_TYPES_H_
+
+#include "iavf_types.h"
+
+#include <linux/avf/virtchnl.h>
+#include <linux/ptp_clock_kernel.h>
+
+/* structure used to queue PTP commands for processing */
+struct iavf_ptp_aq_cmd {
+ struct list_head list;
+ enum virtchnl_ops v_opcode:16;
+ u16 msglen;
+ u8 msg[] __counted_by(msglen);
+};
+
+struct iavf_ptp {
+ wait_queue_head_t phc_time_waitqueue;
+ struct virtchnl_ptp_caps hw_caps;
+ struct ptp_clock_info info;
+ struct ptp_clock *clock;
+ struct list_head aq_cmds;
+ u64 cached_phc_time;
+ unsigned long cached_phc_updated;
+ /* Lock protecting access to the AQ command list */
+ struct mutex aq_cmd_lock;
+ struct kernel_hwtstamp_config hwtstamp_config;
+ bool phc_time_ready:1;
+};
+
+#endif /* _IAVF_TYPES_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 15d388b431c5..a6f0e5990be2 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -4,6 +4,7 @@
#include <linux/net/intel/libie/rx.h>
#include "iavf.h"
+#include "iavf_ptp.h"
#include "iavf_prototype.h"
/**
@@ -144,9 +145,11 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
VIRTCHNL_VF_OFFLOAD_ENCAP |
VIRTCHNL_VF_OFFLOAD_TC_U32 |
VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
+ VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC |
VIRTCHNL_VF_OFFLOAD_CRC |
VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+ VIRTCHNL_VF_CAP_PTP |
VIRTCHNL_VF_OFFLOAD_ADQ |
VIRTCHNL_VF_OFFLOAD_USO |
VIRTCHNL_VF_OFFLOAD_FDIR_PF |
@@ -177,6 +180,54 @@ int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter)
NULL, 0);
}
+int iavf_send_vf_supported_rxdids_msg(struct iavf_adapter *adapter)
+{
+ adapter->aq_required &= ~IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS;
+
+ if (!IAVF_RXDID_ALLOWED(adapter))
+ return -EOPNOTSUPP;
+
+ adapter->current_op = VIRTCHNL_OP_GET_SUPPORTED_RXDIDS;
+
+ return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS,
+ NULL, 0);
+}
+
+/**
+ * iavf_send_vf_ptp_caps_msg - Send request for PTP capabilities
+ * @adapter: private adapter structure
+ *
+ * Send the VIRTCHNL_OP_1588_PTP_GET_CAPS command to the PF to request the PTP
+ * capabilities available to this device. This includes the following
+ * potential access:
+ *
+ * * READ_PHC - access to read the PTP hardware clock time
+ * * RX_TSTAMP - access to request Rx timestamps on all received packets
+ *
+ * The PF will reply with the same opcode a filled out copy of the
+ * virtchnl_ptp_caps structure which defines the specifics of which features
+ * are accessible to this device.
+ *
+ * Return: 0 if success, error code otherwise.
+ */
+int iavf_send_vf_ptp_caps_msg(struct iavf_adapter *adapter)
+{
+ struct virtchnl_ptp_caps hw_caps = {
+ .caps = VIRTCHNL_1588_PTP_CAP_READ_PHC |
+ VIRTCHNL_1588_PTP_CAP_RX_TSTAMP
+ };
+
+ adapter->aq_required &= ~IAVF_FLAG_AQ_GET_PTP_CAPS;
+
+ if (!IAVF_PTP_ALLOWED(adapter))
+ return -EOPNOTSUPP;
+
+ adapter->current_op = VIRTCHNL_OP_1588_PTP_GET_CAPS;
+
+ return iavf_send_pf_msg(adapter, VIRTCHNL_OP_1588_PTP_GET_CAPS,
+ (u8 *)&hw_caps, sizeof(hw_caps));
+}
+
/**
* iavf_validate_num_queues
* @adapter: adapter structure
@@ -263,6 +314,40 @@ int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter)
return err;
}
+int iavf_get_vf_supported_rxdids(struct iavf_adapter *adapter)
+{
+ struct iavf_arq_event_info event;
+ u64 rxdids;
+ int err;
+
+ event.msg_buf = (u8 *)&rxdids;
+ event.buf_len = sizeof(rxdids);
+
+ err = iavf_poll_virtchnl_msg(&adapter->hw, &event,
+ VIRTCHNL_OP_GET_SUPPORTED_RXDIDS);
+ if (!err)
+ adapter->supp_rxdids = rxdids;
+
+ return err;
+}
+
+int iavf_get_vf_ptp_caps(struct iavf_adapter *adapter)
+{
+ struct virtchnl_ptp_caps caps = {};
+ struct iavf_arq_event_info event;
+ int err;
+
+ event.msg_buf = (u8 *)&caps;
+ event.buf_len = sizeof(caps);
+
+ err = iavf_poll_virtchnl_msg(&adapter->hw, &event,
+ VIRTCHNL_OP_1588_PTP_GET_CAPS);
+ if (!err)
+ adapter->ptp.hw_caps = caps;
+
+ return err;
+}
+
/**
* iavf_configure_queues
* @adapter: adapter structure
@@ -275,6 +360,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
int pairs = adapter->num_active_queues;
struct virtchnl_queue_pair_info *vqpi;
u32 i, max_frame;
+ u8 rx_flags = 0;
size_t len;
max_frame = LIBIE_MAX_RX_FRM_LEN(adapter->rx_rings->pp->p.offset);
@@ -292,6 +378,9 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
if (!vqci)
return;
+ if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP))
+ rx_flags |= VIRTCHNL_PTP_RX_TSTAMP;
+
vqci->vsi_id = adapter->vsi_res->vsi_id;
vqci->num_queue_pairs = pairs;
vqpi = vqci->qpair;
@@ -309,9 +398,12 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma;
vqpi->rxq.max_pkt_size = max_frame;
vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len;
+ if (IAVF_RXDID_ALLOWED(adapter))
+ vqpi->rxq.rxdid = adapter->rxdid;
if (CRC_OFFLOAD_ALLOWED(adapter))
vqpi->rxq.crc_disable = !!(adapter->netdev->features &
NETIF_F_RXFCS);
+ vqpi->rxq.flags = rx_flags;
vqpi++;
}
@@ -1402,6 +1494,67 @@ void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid)
VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2);
}
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+/**
+ * iavf_virtchnl_send_ptp_cmd - Send one queued PTP command
+ * @adapter: adapter private structure
+ *
+ * De-queue one PTP command request and send the command message to the PF.
+ * Clear IAVF_FLAG_AQ_SEND_PTP_CMD if no more messages are left to send.
+ */
+void iavf_virtchnl_send_ptp_cmd(struct iavf_adapter *adapter)
+{
+ struct iavf_ptp_aq_cmd *cmd;
+ int err;
+
+ if (!adapter->ptp.clock) {
+ /* This shouldn't be possible to hit, since no messages should
+ * be queued if PTP is not initialized.
+ */
+ pci_err(adapter->pdev, "PTP is not initialized\n");
+ adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD;
+ return;
+ }
+
+ mutex_lock(&adapter->ptp.aq_cmd_lock);
+ cmd = list_first_entry_or_null(&adapter->ptp.aq_cmds,
+ struct iavf_ptp_aq_cmd, list);
+ if (!cmd) {
+ /* no further PTP messages to send */
+ adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD;
+ goto out_unlock;
+ }
+
+ if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ /* bail because we already have a command pending */
+ pci_err(adapter->pdev,
+ "Cannot send PTP command %d, command %d pending\n",
+ cmd->v_opcode, adapter->current_op);
+ goto out_unlock;
+ }
+
+ err = iavf_send_pf_msg(adapter, cmd->v_opcode, cmd->msg, cmd->msglen);
+ if (!err) {
+ /* Command was sent without errors, so we can remove it from
+ * the list and discard it.
+ */
+ list_del(&cmd->list);
+ kfree(cmd);
+ } else {
+ /* We failed to send the command, try again next cycle */
+ pci_err(adapter->pdev, "Failed to send PTP command %d\n",
+ cmd->v_opcode);
+ }
+
+ if (list_empty(&adapter->ptp.aq_cmds))
+ /* no further PTP messages to send */
+ adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD;
+
+out_unlock:
+ mutex_unlock(&adapter->ptp.aq_cmd_lock);
+}
+#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+
/**
* iavf_print_link_message - print link up or down
* @adapter: adapter structure
@@ -2098,6 +2251,37 @@ static void iavf_activate_fdir_filters(struct iavf_adapter *adapter)
}
/**
+ * iavf_virtchnl_ptp_get_time - Respond to VIRTCHNL_OP_1588_PTP_GET_TIME
+ * @adapter: private adapter structure
+ * @data: the message from the PF
+ * @len: length of the message from the PF
+ *
+ * Handle the VIRTCHNL_OP_1588_PTP_GET_TIME message from the PF. This message
+ * is sent by the PF in response to the same op as a request from the VF.
+ * Extract the 64bit nanoseconds time from the message and store it in
+ * cached_phc_time. Then, notify any thread that is waiting for the update via
+ * the wait queue.
+ */
+static void iavf_virtchnl_ptp_get_time(struct iavf_adapter *adapter,
+ void *data, u16 len)
+{
+ struct virtchnl_phc_time *msg = data;
+
+ if (len != sizeof(*msg)) {
+ dev_err_once(&adapter->pdev->dev,
+ "Invalid VIRTCHNL_OP_1588_PTP_GET_TIME from PF. Got size %u, expected %zu\n",
+ len, sizeof(*msg));
+ return;
+ }
+
+ adapter->ptp.cached_phc_time = msg->time;
+ adapter->ptp.cached_phc_updated = jiffies;
+ adapter->ptp.phc_time_ready = true;
+
+ wake_up(&adapter->ptp.phc_time_waitqueue);
+}
+
+/**
* iavf_virtchnl_completion
* @adapter: adapter structure
* @v_opcode: opcode sent by PF
@@ -2509,6 +2693,25 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
aq_required;
}
break;
+ case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
+ if (msglen != sizeof(u64))
+ return;
+
+ adapter->supp_rxdids = *(u64 *)msg;
+
+ break;
+ case VIRTCHNL_OP_1588_PTP_GET_CAPS:
+ if (msglen != sizeof(adapter->ptp.hw_caps))
+ return;
+
+ adapter->ptp.hw_caps = *(struct virtchnl_ptp_caps *)msg;
+
+ /* process any state change needed due to new capabilities */
+ iavf_ptp_process_caps(adapter);
+ break;
+ case VIRTCHNL_OP_1588_PTP_GET_TIME:
+ iavf_virtchnl_ptp_get_time(adapter, msg, msglen);
+ break;
case VIRTCHNL_OP_ENABLE_QUEUES:
/* enable transmits */
iavf_irq_enable(adapter, true);
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
index dbdb83567364..fcb199efbea5 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
@@ -1205,6 +1205,25 @@ static int ice_devlink_set_parent(struct devlink_rate *devlink_rate,
return status;
}
+static void ice_set_min_max_msix(struct ice_pf *pf)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+ union devlink_param_value val;
+ int err;
+
+ err = devl_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+ &val);
+ if (!err)
+ pf->msix.min = val.vu32;
+
+ err = devl_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+ &val);
+ if (!err)
+ pf->msix.max = val.vu32;
+}
+
/**
* ice_devlink_reinit_up - do reinit of the given PF
* @pf: pointer to the PF struct
@@ -1220,6 +1239,9 @@ static int ice_devlink_reinit_up(struct ice_pf *pf)
return err;
}
+ /* load MSI-X values */
+ ice_set_min_max_msix(pf);
+
err = ice_init_dev(pf);
if (err)
goto unroll_hw_init;
@@ -1533,6 +1555,43 @@ static int ice_devlink_local_fwd_validate(struct devlink *devlink, u32 id,
return 0;
}
+static int
+ice_devlink_msix_max_pf_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+
+ if (val.vu32 > pf->hw.func_caps.common_cap.num_msix_vectors)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+ice_devlink_msix_min_pf_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ if (val.vu32 < ICE_MIN_MSIX)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ice_devlink_enable_rdma_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ bool new_state = val.vbool;
+
+ if (new_state && !test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
enum ice_param_id {
ICE_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
@@ -1548,6 +1607,17 @@ static const struct devlink_param ice_dvl_rdma_params[] = {
ice_devlink_enable_iw_get,
ice_devlink_enable_iw_set,
ice_devlink_enable_iw_validate),
+ DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, ice_devlink_enable_rdma_validate),
+};
+
+static const struct devlink_param ice_dvl_msix_params[] = {
+ DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MAX,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, ice_devlink_msix_max_pf_validate),
+ DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MIN,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, ice_devlink_msix_min_pf_validate),
};
static const struct devlink_param ice_dvl_sched_params[] = {
@@ -1651,6 +1721,7 @@ void ice_devlink_unregister(struct ice_pf *pf)
int ice_devlink_register_params(struct ice_pf *pf)
{
struct devlink *devlink = priv_to_devlink(pf);
+ union devlink_param_value value;
struct ice_hw *hw = &pf->hw;
int status;
@@ -1659,10 +1730,39 @@ int ice_devlink_register_params(struct ice_pf *pf)
if (status)
return status;
+ status = devl_params_register(devlink, ice_dvl_msix_params,
+ ARRAY_SIZE(ice_dvl_msix_params));
+ if (status)
+ goto unregister_rdma_params;
+
if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
status = devl_params_register(devlink, ice_dvl_sched_params,
ARRAY_SIZE(ice_dvl_sched_params));
+ if (status)
+ goto unregister_msix_params;
+
+ value.vu32 = pf->msix.max;
+ devl_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+ value);
+ value.vu32 = pf->msix.min;
+ devl_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+ value);
+
+ value.vbool = test_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+ devl_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ value);
+
+ return 0;
+unregister_msix_params:
+ devl_params_unregister(devlink, ice_dvl_msix_params,
+ ARRAY_SIZE(ice_dvl_msix_params));
+unregister_rdma_params:
+ devl_params_unregister(devlink, ice_dvl_rdma_params,
+ ARRAY_SIZE(ice_dvl_rdma_params));
return status;
}
@@ -1673,6 +1773,8 @@ void ice_devlink_unregister_params(struct ice_pf *pf)
devl_params_unregister(devlink, ice_dvl_rdma_params,
ARRAY_SIZE(ice_dvl_rdma_params));
+ devl_params_unregister(devlink, ice_dvl_msix_params,
+ ARRAY_SIZE(ice_dvl_msix_params));
if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
devl_params_unregister(devlink, ice_dvl_sched_params,
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 71e05d30f0fd..c9104b13e1d2 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -97,9 +97,6 @@
#define ICE_MIN_LAN_OICR_MSIX 1
#define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
#define ICE_FDIR_MSIX 2
-#define ICE_RDMA_NUM_AEQ_MSIX 4
-#define ICE_MIN_RDMA_MSIX 2
-#define ICE_ESWITCH_MSIX 1
#define ICE_NO_VSI 0xffff
#define ICE_VSI_MAP_CONTIG 0
#define ICE_VSI_MAP_SCATTER 1
@@ -542,6 +539,14 @@ struct ice_agg_node {
u8 valid;
};
+struct ice_pf_msix {
+ u32 cur;
+ u32 min;
+ u32 max;
+ u32 total;
+ u32 rest;
+};
+
struct ice_pf {
struct pci_dev *pdev;
struct ice_adapter *adapter;
@@ -556,13 +561,7 @@ struct ice_pf {
/* OS reserved IRQ details */
struct msix_entry *msix_entries;
struct ice_irq_tracker irq_tracker;
- /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
- * number of MSIX vectors needed for all SR-IOV VFs from the number of
- * MSIX vectors allowed on this PF.
- */
- u16 sriov_base_vector;
- unsigned long *sriov_irq_bm; /* bitmap to track irq usage */
- u16 sriov_irq_size; /* size of the irq_bm bitmap */
+ struct ice_virt_irq_tracker virt_irq_tracker;
u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */
@@ -612,7 +611,7 @@ struct ice_pf {
struct msi_map ll_ts_irq; /* LL_TS interrupt MSIX vector */
u16 max_pf_txqs; /* Total Tx queues PF wide */
u16 max_pf_rxqs; /* Total Rx queues PF wide */
- u16 num_lan_msix; /* Total MSIX vectors for base driver */
+ struct ice_pf_msix msix;
u16 num_lan_tx; /* num LAN Tx queues setup */
u16 num_lan_rx; /* num LAN Rx queues setup */
u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */
@@ -1047,10 +1046,5 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
}
-static inline enum ice_phy_model ice_get_phy_model(const struct ice_hw *hw)
-{
- return hw->ptp.phy_model;
-}
-
extern const struct xdp_metadata_ops ice_xdp_md_ops;
#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index b2af8e3586f7..b3234a55a253 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -473,9 +473,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
*/
if (vsi->type != ICE_VSI_VF)
ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true);
- else
- ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3,
- false);
/* Absolute queue number out of 2K needs to be passed */
err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
@@ -801,13 +798,11 @@ int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
return 0;
err_out:
- while (v_idx--)
- ice_free_q_vector(vsi, v_idx);
- dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
- vsi->num_q_vectors, vsi->vsi_num, err);
- vsi->num_q_vectors = 0;
- return err;
+ dev_info(dev, "Failed to allocate %d q_vectors for VSI %d, new value %d",
+ vsi->num_q_vectors, vsi->vsi_num, v_idx);
+ vsi->num_q_vectors = v_idx;
+ return v_idx ? 0 : err;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 7a2a2e8da8fa..aaa592ffd2d8 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -186,7 +186,7 @@ static int ice_set_mac_type(struct ice_hw *hw)
* ice_is_generic_mac - check if device's mac_type is generic
* @hw: pointer to the hardware structure
*
- * Return: true if mac_type is generic (with SBQ support), false if not
+ * Return: true if mac_type is ICE_MAC_GENERIC*, false otherwise.
*/
bool ice_is_generic_mac(struct ice_hw *hw)
{
@@ -195,120 +195,6 @@ bool ice_is_generic_mac(struct ice_hw *hw)
}
/**
- * ice_is_e810
- * @hw: pointer to the hardware structure
- *
- * returns true if the device is E810 based, false if not.
- */
-bool ice_is_e810(struct ice_hw *hw)
-{
- return hw->mac_type == ICE_MAC_E810;
-}
-
-/**
- * ice_is_e810t
- * @hw: pointer to the hardware structure
- *
- * returns true if the device is E810T based, false if not.
- */
-bool ice_is_e810t(struct ice_hw *hw)
-{
- switch (hw->device_id) {
- case ICE_DEV_ID_E810C_SFP:
- switch (hw->subsystem_device_id) {
- case ICE_SUBDEV_ID_E810T:
- case ICE_SUBDEV_ID_E810T2:
- case ICE_SUBDEV_ID_E810T3:
- case ICE_SUBDEV_ID_E810T4:
- case ICE_SUBDEV_ID_E810T6:
- case ICE_SUBDEV_ID_E810T7:
- return true;
- }
- break;
- case ICE_DEV_ID_E810C_QSFP:
- switch (hw->subsystem_device_id) {
- case ICE_SUBDEV_ID_E810T2:
- case ICE_SUBDEV_ID_E810T3:
- case ICE_SUBDEV_ID_E810T5:
- return true;
- }
- break;
- default:
- break;
- }
-
- return false;
-}
-
-/**
- * ice_is_e822 - Check if a device is E822 family device
- * @hw: pointer to the hardware structure
- *
- * Return: true if the device is E822 based, false if not.
- */
-bool ice_is_e822(struct ice_hw *hw)
-{
- switch (hw->device_id) {
- case ICE_DEV_ID_E822C_BACKPLANE:
- case ICE_DEV_ID_E822C_QSFP:
- case ICE_DEV_ID_E822C_SFP:
- case ICE_DEV_ID_E822C_10G_BASE_T:
- case ICE_DEV_ID_E822C_SGMII:
- case ICE_DEV_ID_E822L_BACKPLANE:
- case ICE_DEV_ID_E822L_SFP:
- case ICE_DEV_ID_E822L_10G_BASE_T:
- case ICE_DEV_ID_E822L_SGMII:
- return true;
- default:
- return false;
- }
-}
-
-/**
- * ice_is_e823
- * @hw: pointer to the hardware structure
- *
- * returns true if the device is E823-L or E823-C based, false if not.
- */
-bool ice_is_e823(struct ice_hw *hw)
-{
- switch (hw->device_id) {
- case ICE_DEV_ID_E823L_BACKPLANE:
- case ICE_DEV_ID_E823L_SFP:
- case ICE_DEV_ID_E823L_10G_BASE_T:
- case ICE_DEV_ID_E823L_1GBE:
- case ICE_DEV_ID_E823L_QSFP:
- case ICE_DEV_ID_E823C_BACKPLANE:
- case ICE_DEV_ID_E823C_QSFP:
- case ICE_DEV_ID_E823C_SFP:
- case ICE_DEV_ID_E823C_10G_BASE_T:
- case ICE_DEV_ID_E823C_SGMII:
- return true;
- default:
- return false;
- }
-}
-
-/**
- * ice_is_e825c - Check if a device is E825C family device
- * @hw: pointer to the hardware structure
- *
- * Return: true if the device is E825-C based, false if not.
- */
-bool ice_is_e825c(struct ice_hw *hw)
-{
- switch (hw->device_id) {
- case ICE_DEV_ID_E825C_BACKPLANE:
- case ICE_DEV_ID_E825C_QSFP:
- case ICE_DEV_ID_E825C_SFP:
- case ICE_DEV_ID_E825C_SGMII:
- return true;
- default:
- return false;
- }
-}
-
-/**
* ice_is_pf_c827 - check if pf contains c827 phy
* @hw: pointer to the hw struct
*
@@ -2408,7 +2294,7 @@ ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0);
info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0);
- if (!ice_is_e825c(hw)) {
+ if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) {
info->clk_freq = FIELD_GET(ICE_TS_CLK_FREQ_M, number);
info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0);
} else {
@@ -5765,6 +5651,96 @@ ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
}
/**
+ * ice_get_pca9575_handle - find and return the PCA9575 controller
+ * @hw: pointer to the hw struct
+ * @pca9575_handle: GPIO controller's handle
+ *
+ * Find and return the GPIO controller's handle in the netlist.
+ * When found - the value will be cached in the hw structure and following calls
+ * will return cached value.
+ *
+ * Return: 0 on success, -ENXIO when there's no PCA9575 present.
+ */
+int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle)
+{
+ struct ice_aqc_get_link_topo *cmd;
+ struct ice_aq_desc desc;
+ int err;
+ u8 idx;
+
+ /* If handle was read previously return cached value */
+ if (hw->io_expander_handle) {
+ *pca9575_handle = hw->io_expander_handle;
+ return 0;
+ }
+
+#define SW_PCA9575_SFP_TOPO_IDX 2
+#define SW_PCA9575_QSFP_TOPO_IDX 1
+
+ /* Check if the SW IO expander controlling SMA exists in the netlist. */
+ if (hw->device_id == ICE_DEV_ID_E810C_SFP)
+ idx = SW_PCA9575_SFP_TOPO_IDX;
+ else if (hw->device_id == ICE_DEV_ID_E810C_QSFP)
+ idx = SW_PCA9575_QSFP_TOPO_IDX;
+ else
+ return -ENXIO;
+
+ /* If handle was not detected read it from the netlist */
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+ cmd = &desc.params.get_link_topo;
+ cmd->addr.topo_params.node_type_ctx =
+ ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL;
+ cmd->addr.topo_params.index = idx;
+
+ err = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (err)
+ return -ENXIO;
+
+ /* Verify if we found the right IO expander type */
+ if (desc.params.get_link_topo.node_part_num !=
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575)
+ return -ENXIO;
+
+ /* If present save the handle and return it */
+ hw->io_expander_handle =
+ le16_to_cpu(desc.params.get_link_topo.addr.handle);
+ *pca9575_handle = hw->io_expander_handle;
+
+ return 0;
+}
+
+/**
+ * ice_read_pca9575_reg - read the register from the PCA9575 controller
+ * @hw: pointer to the hw struct
+ * @offset: GPIO controller register offset
+ * @data: pointer to data to be read from the GPIO controller
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data)
+{
+ struct ice_aqc_link_topo_addr link_topo;
+ __le16 addr;
+ u16 handle;
+ int err;
+
+ memset(&link_topo, 0, sizeof(link_topo));
+
+ err = ice_get_pca9575_handle(hw, &handle);
+ if (err)
+ return err;
+
+ link_topo.handle = cpu_to_le16(handle);
+ link_topo.topo_params.node_type_ctx =
+ FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
+ ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED);
+
+ addr = cpu_to_le16((u16)offset);
+
+ return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL);
+}
+
+/**
* ice_aq_set_gpio
* @hw: pointer to the hw struct
* @gpio_ctrl_handle: GPIO controller node handle
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 15ba38543738..9b00aa0ddf10 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -131,7 +131,6 @@ int
ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
struct ice_sq_cd *cd);
bool ice_is_generic_mac(struct ice_hw *hw);
-bool ice_is_e810(struct ice_hw *hw);
int ice_clear_pf_cfg(struct ice_hw *hw);
int
ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
@@ -276,10 +275,6 @@ ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
void
ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
u64 *prev_stat, u64 *cur_stat);
-bool ice_is_e810t(struct ice_hw *hw);
-bool ice_is_e822(struct ice_hw *hw);
-bool ice_is_e823(struct ice_hw *hw);
-bool ice_is_e825c(struct ice_hw *hw);
int
ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
struct ice_aqc_txsched_elem_data *buf);
@@ -306,5 +301,7 @@ int
ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
u16 bus_addr, __le16 addr, u8 params, const u8 *data,
struct ice_sq_cd *cd);
+int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle);
+int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data);
bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw);
#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
index 03988be03729..69d5b1a28491 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
@@ -2345,14 +2345,14 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size,
cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM |
ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW;
- if (ice_is_e825c(hw))
+ if (hw->mac_type == ICE_MAC_GENERIC_3K_E825)
desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
} else {
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo);
cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM;
}
- if (!ice_is_e825c(hw))
+ if (hw->mac_type != ICE_MAC_GENERIC_3K_E825)
desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index f241493a6ac8..b0805704834d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3788,8 +3788,7 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info)
*/
static int ice_get_max_txq(struct ice_pf *pf)
{
- return min3(pf->num_lan_msix, (u16)num_online_cpus(),
- (u16)pf->hw.func_caps.common_cap.num_txq);
+ return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq);
}
/**
@@ -3798,8 +3797,7 @@ static int ice_get_max_txq(struct ice_pf *pf)
*/
static int ice_get_max_rxq(struct ice_pf *pf)
{
- return min3(pf->num_lan_msix, (u16)num_online_cpus(),
- (u16)pf->hw.func_caps.common_cap.num_rxq);
+ return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq);
}
/**
@@ -3817,8 +3815,7 @@ static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
ice_for_each_q_vector(vsi, q_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
- if (q_vector->rx.rx_ring && q_vector->tx.tx_ring)
- combined++;
+ combined += min(q_vector->num_ring_tx, q_vector->num_ring_rx);
}
return combined;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index ee9862ddfe15..1d118171de37 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1605,22 +1605,19 @@ void ice_fdir_replay_fltrs(struct ice_pf *pf)
*/
int ice_fdir_create_dflt_rules(struct ice_pf *pf)
{
+ const enum ice_fltr_ptype dflt_rules[] = {
+ ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+ ICE_FLTR_PTYPE_NONF_IPV6_TCP, ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+ };
int err;
/* Create perfect TCP and UDP rules in hardware. */
- err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP);
- if (err)
- return err;
-
- err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP);
- if (err)
- return err;
+ for (int i = 0; i < ARRAY_SIZE(dflt_rules); i++) {
+ err = ice_create_init_fdir_rule(pf, dflt_rules[i]);
- err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP);
- if (err)
- return err;
-
- err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP);
+ if (err)
+ break;
+ }
return err;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
index b2148dbe49b2..6b26290452d4 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.c
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -381,32 +381,23 @@ void ice_gnss_exit(struct ice_pf *pf)
}
/**
- * ice_gnss_is_gps_present - Check if GPS HW is present
+ * ice_gnss_is_module_present - Check if GNSS HW is present
* @hw: pointer to HW struct
+ *
+ * Return: true when GNSS is present, false otherwise.
*/
-bool ice_gnss_is_gps_present(struct ice_hw *hw)
+bool ice_gnss_is_module_present(struct ice_hw *hw)
{
- if (!hw->func_caps.ts_func_info.src_tmr_owned)
- return false;
+ int err;
+ u8 data;
- if (!ice_is_gps_in_netlist(hw))
+ if (!hw->func_caps.ts_func_info.src_tmr_owned ||
+ !ice_is_gps_in_netlist(hw))
return false;
-#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
- if (ice_is_e810t(hw)) {
- int err;
- u8 data;
-
- err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data);
- if (err || !!(data & ICE_P0_GNSS_PRSNT_N))
- return false;
- } else {
- return false;
- }
-#else
- if (!ice_is_e810t(hw))
+ err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data);
+ if (err || !!(data & ICE_P0_GNSS_PRSNT_N))
return false;
-#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
return true;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h
index 75e567ad7059..15daf603ed7b 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.h
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.h
@@ -37,11 +37,11 @@ struct gnss_serial {
#if IS_ENABLED(CONFIG_GNSS)
void ice_gnss_init(struct ice_pf *pf);
void ice_gnss_exit(struct ice_pf *pf);
-bool ice_gnss_is_gps_present(struct ice_hw *hw);
+bool ice_gnss_is_module_present(struct ice_hw *hw);
#else
static inline void ice_gnss_init(struct ice_pf *pf) { }
static inline void ice_gnss_exit(struct ice_pf *pf) { }
-static inline bool ice_gnss_is_gps_present(struct ice_hw *hw)
+static inline bool ice_gnss_is_module_present(struct ice_hw *hw)
{
return false;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index dc88aea9f473..aa4bfbcf85d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -541,10 +541,22 @@
#define PFPM_WUS_MAG_M BIT(1)
#define PFPM_WUS_MNG_M BIT(3)
#define PFPM_WUS_FW_RST_WK_M BIT(31)
+#define E830_PRTMAC_TS_TX_MEM_VALID_H 0x001E2020
+#define E830_PRTMAC_TS_TX_MEM_VALID_L 0x001E2000
#define E830_PRTMAC_CL01_PS_QNT 0x001E32A0
#define E830_PRTMAC_CL01_PS_QNT_CL0_M GENMASK(15, 0)
#define E830_PRTMAC_CL01_QNT_THR 0x001E3320
#define E830_PRTMAC_CL01_QNT_THR_CL0_M GENMASK(15, 0)
+#define E830_PRTTSYN_TXTIME_H(_i) (0x001E5800 + ((_i) * 32))
+#define E830_PRTTSYN_TXTIME_L(_i) (0x001E5000 + ((_i) * 32))
+#define E830_GLPTM_ART_CTL 0x00088B50
+#define E830_GLPTM_ART_CTL_ACTIVE_M BIT(0)
+#define E830_GLPTM_ART_TIME_H 0x00088B54
+#define E830_GLPTM_ART_TIME_L 0x00088B58
+#define E830_GLTSYN_PTMTIME_H(_i) (0x00088B48 + ((_i) * 4))
+#define E830_GLTSYN_PTMTIME_L(_i) (0x00088B40 + ((_i) * 4))
+#define E830_PFPTM_SEM 0x00088B00
+#define E830_PFPTM_SEM_BUSY_M BIT(0)
#define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4))
#define VFINT_DYN_CTLN_CLEARPBA_M BIT(1)
#define E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH 0x00234000
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index 145b27f2a4ce..bab3e81cad5d 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -228,61 +228,34 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos)
}
EXPORT_SYMBOL_GPL(ice_get_qos_params);
-/**
- * ice_alloc_rdma_qvectors - Allocate vector resources for RDMA driver
- * @pf: board private structure to initialize
- */
-static int ice_alloc_rdma_qvectors(struct ice_pf *pf)
+int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry)
{
- if (ice_is_rdma_ena(pf)) {
- int i;
-
- pf->msix_entries = kcalloc(pf->num_rdma_msix,
- sizeof(*pf->msix_entries),
- GFP_KERNEL);
- if (!pf->msix_entries)
- return -ENOMEM;
+ struct msi_map map = ice_alloc_irq(pf, true);
- /* RDMA is the only user of pf->msix_entries array */
- pf->rdma_base_vector = 0;
-
- for (i = 0; i < pf->num_rdma_msix; i++) {
- struct msix_entry *entry = &pf->msix_entries[i];
- struct msi_map map;
+ if (map.index < 0)
+ return -ENOMEM;
- map = ice_alloc_irq(pf, false);
- if (map.index < 0)
- break;
+ entry->entry = map.index;
+ entry->vector = map.virq;
- entry->entry = map.index;
- entry->vector = map.virq;
- }
- }
return 0;
}
+EXPORT_SYMBOL_GPL(ice_alloc_rdma_qvector);
/**
* ice_free_rdma_qvector - free vector resources reserved for RDMA driver
* @pf: board private structure to initialize
+ * @entry: MSI-X entry to be removed
*/
-static void ice_free_rdma_qvector(struct ice_pf *pf)
+void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry)
{
- int i;
-
- if (!pf->msix_entries)
- return;
-
- for (i = 0; i < pf->num_rdma_msix; i++) {
- struct msi_map map;
+ struct msi_map map;
- map.index = pf->msix_entries[i].entry;
- map.virq = pf->msix_entries[i].vector;
- ice_free_irq(pf, map);
- }
-
- kfree(pf->msix_entries);
- pf->msix_entries = NULL;
+ map.index = entry->entry;
+ map.virq = entry->vector;
+ ice_free_irq(pf, map);
}
+EXPORT_SYMBOL_GPL(ice_free_rdma_qvector);
/**
* ice_adev_release - function to be mapped to AUX dev's release op
@@ -382,12 +355,6 @@ int ice_init_rdma(struct ice_pf *pf)
return -ENOMEM;
}
- /* Reserve vector resources */
- ret = ice_alloc_rdma_qvectors(pf);
- if (ret < 0) {
- dev_err(dev, "failed to reserve vectors for RDMA\n");
- goto err_reserve_rdma_qvector;
- }
pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2;
ret = ice_plug_aux_dev(pf);
if (ret)
@@ -395,8 +362,6 @@ int ice_init_rdma(struct ice_pf *pf)
return 0;
err_plug_aux_dev:
- ice_free_rdma_qvector(pf);
-err_reserve_rdma_qvector:
pf->adev = NULL;
xa_erase(&ice_aux_id, pf->aux_idx);
return ret;
@@ -412,6 +377,5 @@ void ice_deinit_rdma(struct ice_pf *pf)
return;
ice_unplug_aux_dev(pf);
- ice_free_rdma_qvector(pf);
xa_erase(&ice_aux_id, pf->aux_idx);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c
index ad82ff7d1995..30801fd375f0 100644
--- a/drivers/net/ethernet/intel/ice/ice_irq.c
+++ b/drivers/net/ethernet/intel/ice/ice_irq.c
@@ -20,6 +20,19 @@ ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors,
xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC);
}
+static int
+ice_init_virt_irq_tracker(struct ice_pf *pf, u32 base, u32 num_entries)
+{
+ pf->virt_irq_tracker.bm = bitmap_zalloc(num_entries, GFP_KERNEL);
+ if (!pf->virt_irq_tracker.bm)
+ return -ENOMEM;
+
+ pf->virt_irq_tracker.num_entries = num_entries;
+ pf->virt_irq_tracker.base = base;
+
+ return 0;
+}
+
/**
* ice_deinit_irq_tracker - free xarray tracker
* @pf: board private structure
@@ -29,6 +42,11 @@ static void ice_deinit_irq_tracker(struct ice_pf *pf)
xa_destroy(&pf->irq_tracker.entries);
}
+static void ice_deinit_virt_irq_tracker(struct ice_pf *pf)
+{
+ bitmap_free(pf->virt_irq_tracker.bm);
+}
+
/**
* ice_free_irq_res - free a block of resources
* @pf: board private structure
@@ -45,7 +63,7 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index)
/**
* ice_get_irq_res - get an interrupt resource
* @pf: board private structure
- * @dyn_only: force entry to be dynamically allocated
+ * @dyn_allowed: allow entry to be dynamically allocated
*
* Allocate new irq entry in the free slot of the tracker. Since xarray
* is used, always allocate new entry at the lowest possible index. Set
@@ -53,11 +71,12 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index)
*
* Returns allocated irq entry or NULL on failure.
*/
-static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only)
+static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf,
+ bool dyn_allowed)
{
- struct xa_limit limit = { .max = pf->irq_tracker.num_entries,
+ struct xa_limit limit = { .max = pf->irq_tracker.num_entries - 1,
.min = 0 };
- unsigned int num_static = pf->irq_tracker.num_static;
+ unsigned int num_static = pf->irq_tracker.num_static - 1;
struct ice_irq_entry *entry;
unsigned int index;
int ret;
@@ -66,9 +85,9 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only)
if (!entry)
return NULL;
- /* skip preallocated entries if the caller says so */
- if (dyn_only)
- limit.min = num_static;
+ /* only already allocated if the caller says so */
+ if (!dyn_allowed)
+ limit.max = num_static;
ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit,
GFP_KERNEL);
@@ -78,161 +97,18 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only)
entry = NULL;
} else {
entry->index = index;
- entry->dynamic = index >= num_static;
+ entry->dynamic = index > num_static;
}
return entry;
}
-/**
- * ice_reduce_msix_usage - Reduce usage of MSI-X vectors
- * @pf: board private structure
- * @v_remain: number of remaining MSI-X vectors to be distributed
- *
- * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled.
- * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of
- * remaining vectors.
- */
-static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain)
-{
- int v_rdma;
-
- if (!ice_is_rdma_ena(pf)) {
- pf->num_lan_msix = v_remain;
- return;
- }
-
- /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */
- v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
-
- if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) {
- dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n");
- clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
-
- pf->num_rdma_msix = 0;
- pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
- } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
- (v_remain - v_rdma < v_rdma)) {
- /* Support minimum RDMA and give remaining vectors to LAN MSIX
- */
- pf->num_rdma_msix = ICE_MIN_RDMA_MSIX;
- pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX;
- } else {
- /* Split remaining MSIX with RDMA after accounting for AEQ MSIX
- */
- pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
- ICE_RDMA_NUM_AEQ_MSIX;
- pf->num_lan_msix = v_remain - pf->num_rdma_msix;
- }
-}
-
-/**
- * ice_ena_msix_range - Request a range of MSIX vectors from the OS
- * @pf: board private structure
- *
- * Compute the number of MSIX vectors wanted and request from the OS. Adjust
- * device usage if there are not enough vectors. Return the number of vectors
- * reserved or negative on failure.
- */
-static int ice_ena_msix_range(struct ice_pf *pf)
+#define ICE_RDMA_AEQ_MSIX 1
+static int ice_get_default_msix_amount(struct ice_pf *pf)
{
- int num_cpus, hw_num_msix, v_other, v_wanted, v_actual;
- struct device *dev = ice_pf_to_dev(pf);
- int err;
-
- hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors;
- num_cpus = num_online_cpus();
-
- /* LAN miscellaneous handler */
- v_other = ICE_MIN_LAN_OICR_MSIX;
-
- /* Flow Director */
- if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
- v_other += ICE_FDIR_MSIX;
-
- /* switchdev */
- v_other += ICE_ESWITCH_MSIX;
-
- v_wanted = v_other;
-
- /* LAN traffic */
- pf->num_lan_msix = num_cpus;
- v_wanted += pf->num_lan_msix;
-
- /* RDMA auxiliary driver */
- if (ice_is_rdma_ena(pf)) {
- pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
- v_wanted += pf->num_rdma_msix;
- }
-
- if (v_wanted > hw_num_msix) {
- int v_remain;
-
- dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n",
- v_wanted, hw_num_msix);
-
- if (hw_num_msix < ICE_MIN_MSIX) {
- err = -ERANGE;
- goto exit_err;
- }
-
- v_remain = hw_num_msix - v_other;
- if (v_remain < ICE_MIN_LAN_TXRX_MSIX) {
- v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX;
- v_remain = ICE_MIN_LAN_TXRX_MSIX;
- }
-
- ice_reduce_msix_usage(pf, v_remain);
- v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other;
-
- dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n",
- pf->num_lan_msix);
- if (ice_is_rdma_ena(pf))
- dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n",
- pf->num_rdma_msix);
- }
-
- /* actually reserve the vectors */
- v_actual = pci_alloc_irq_vectors(pf->pdev, ICE_MIN_MSIX, v_wanted,
- PCI_IRQ_MSIX);
- if (v_actual < 0) {
- dev_err(dev, "unable to reserve MSI-X vectors\n");
- err = v_actual;
- goto exit_err;
- }
-
- if (v_actual < v_wanted) {
- dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
- v_wanted, v_actual);
-
- if (v_actual < ICE_MIN_MSIX) {
- /* error if we can't get minimum vectors */
- pci_free_irq_vectors(pf->pdev);
- err = -ERANGE;
- goto exit_err;
- } else {
- int v_remain = v_actual - v_other;
-
- if (v_remain < ICE_MIN_LAN_TXRX_MSIX)
- v_remain = ICE_MIN_LAN_TXRX_MSIX;
-
- ice_reduce_msix_usage(pf, v_remain);
-
- dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
- pf->num_lan_msix);
-
- if (ice_is_rdma_ena(pf))
- dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
- pf->num_rdma_msix);
- }
- }
-
- return v_actual;
-
-exit_err:
- pf->num_rdma_msix = 0;
- pf->num_lan_msix = 0;
- return err;
+ return ICE_MIN_LAN_OICR_MSIX + num_online_cpus() +
+ (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? ICE_FDIR_MSIX : 0) +
+ (ice_is_rdma_ena(pf) ? num_online_cpus() + ICE_RDMA_AEQ_MSIX : 0);
}
/**
@@ -243,6 +119,7 @@ void ice_clear_interrupt_scheme(struct ice_pf *pf)
{
pci_free_irq_vectors(pf->pdev);
ice_deinit_irq_tracker(pf);
+ ice_deinit_virt_irq_tracker(pf);
}
/**
@@ -252,27 +129,38 @@ void ice_clear_interrupt_scheme(struct ice_pf *pf)
int ice_init_interrupt_scheme(struct ice_pf *pf)
{
int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
- int vectors, max_vectors;
+ int vectors;
- vectors = ice_ena_msix_range(pf);
+ /* load default PF MSI-X range */
+ if (!pf->msix.min)
+ pf->msix.min = ICE_MIN_MSIX;
- if (vectors < 0)
- return -ENOMEM;
+ if (!pf->msix.max)
+ pf->msix.max = min(total_vectors,
+ ice_get_default_msix_amount(pf));
+
+ pf->msix.total = total_vectors;
+ pf->msix.rest = total_vectors - pf->msix.max;
if (pci_msix_can_alloc_dyn(pf->pdev))
- max_vectors = total_vectors;
+ vectors = pf->msix.min;
else
- max_vectors = vectors;
+ vectors = pf->msix.max;
+
+ vectors = pci_alloc_irq_vectors(pf->pdev, pf->msix.min, vectors,
+ PCI_IRQ_MSIX);
+ if (vectors < 0)
+ return vectors;
- ice_init_irq_tracker(pf, max_vectors, vectors);
+ ice_init_irq_tracker(pf, pf->msix.max, vectors);
- return 0;
+ return ice_init_virt_irq_tracker(pf, pf->msix.max, pf->msix.rest);
}
/**
* ice_alloc_irq - Allocate new interrupt vector
* @pf: board private structure
- * @dyn_only: force dynamic allocation of the interrupt
+ * @dyn_allowed: allow dynamic allocation of the interrupt
*
* Allocate new interrupt vector for a given owner id.
* return struct msi_map with interrupt details and track
@@ -285,27 +173,22 @@ int ice_init_interrupt_scheme(struct ice_pf *pf)
* interrupt will be allocated with pci_msix_alloc_irq_at.
*
* Some callers may only support dynamically allocated interrupts.
- * This is indicated with dyn_only flag.
+ * This is indicated with dyn_allowed flag.
*
* On failure, return map with negative .index. The caller
* is expected to check returned map index.
*
*/
-struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only)
+struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_allowed)
{
- int sriov_base_vector = pf->sriov_base_vector;
struct msi_map map = { .index = -ENOENT };
struct device *dev = ice_pf_to_dev(pf);
struct ice_irq_entry *entry;
- entry = ice_get_irq_res(pf, dyn_only);
+ entry = ice_get_irq_res(pf, dyn_allowed);
if (!entry)
return map;
- /* fail if we're about to violate SRIOV vectors space */
- if (sriov_base_vector && entry->index >= sriov_base_vector)
- goto exit_free_res;
-
if (pci_msix_can_alloc_dyn(pf->pdev) && entry->dynamic) {
map = pci_msix_alloc_irq_at(pf->pdev, entry->index, NULL);
if (map.index < 0)
@@ -353,26 +236,40 @@ void ice_free_irq(struct ice_pf *pf, struct msi_map map)
}
/**
- * ice_get_max_used_msix_vector - Get the max used interrupt vector
- * @pf: board private structure
+ * ice_virt_get_irqs - get irqs for SR-IOV usacase
+ * @pf: pointer to PF structure
+ * @needed: number of irqs to get
*
- * Return index of maximum used interrupt vectors with respect to the
- * beginning of the MSIX table. Take into account that some interrupts
- * may have been dynamically allocated after MSIX was initially enabled.
+ * This returns the first MSI-X vector index in PF space that is used by this
+ * VF. This index is used when accessing PF relative registers such as
+ * GLINT_VECT2FUNC and GLINT_DYN_CTL.
+ * This will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration_id: id of VF which will
+ * use this irqs
*/
-int ice_get_max_used_msix_vector(struct ice_pf *pf)
+int ice_virt_get_irqs(struct ice_pf *pf, u32 needed)
{
- unsigned long start, index, max_idx;
- void *entry;
+ int res = bitmap_find_next_zero_area(pf->virt_irq_tracker.bm,
+ pf->virt_irq_tracker.num_entries,
+ 0, needed, 0);
- /* Treat all preallocated interrupts as used */
- start = pf->irq_tracker.num_static;
- max_idx = start - 1;
+ if (res >= pf->virt_irq_tracker.num_entries)
+ return -ENOENT;
- xa_for_each_start(&pf->irq_tracker.entries, index, entry, start) {
- if (index > max_idx)
- max_idx = index;
- }
+ bitmap_set(pf->virt_irq_tracker.bm, res, needed);
+
+ /* conversion from number in bitmap to global irq index */
+ return res + pf->virt_irq_tracker.base;
+}
- return max_idx;
+/**
+ * ice_virt_free_irqs - free irqs used by the VF
+ * @pf: pointer to PF structure
+ * @index: first index to be free
+ * @irqs: number of irqs to free
+ */
+void ice_virt_free_irqs(struct ice_pf *pf, u32 index, u32 irqs)
+{
+ bitmap_clear(pf->virt_irq_tracker.bm, index - pf->virt_irq_tracker.base,
+ irqs);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h
index f35efc08575e..b2f9dbafd57e 100644
--- a/drivers/net/ethernet/intel/ice/ice_irq.h
+++ b/drivers/net/ethernet/intel/ice/ice_irq.h
@@ -15,11 +15,22 @@ struct ice_irq_tracker {
u16 num_static; /* preallocated entries */
};
+struct ice_virt_irq_tracker {
+ unsigned long *bm; /* bitmap to track irq usage */
+ u32 num_entries;
+ /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+ * number of MSIX vectors needed for all SR-IOV VFs from the number of
+ * MSIX vectors allowed on this PF.
+ */
+ u32 base;
+};
+
int ice_init_interrupt_scheme(struct ice_pf *pf);
void ice_clear_interrupt_scheme(struct ice_pf *pf);
struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only);
void ice_free_irq(struct ice_pf *pf, struct msi_map map);
-int ice_get_max_used_msix_vector(struct ice_pf *pf);
+int ice_virt_get_irqs(struct ice_pf *pf, u32 needed);
+void ice_virt_free_irqs(struct ice_pf *pf, u32 index, u32 irqs);
#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 38a1c8372180..7f5b229cab05 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -157,6 +157,16 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
}
}
+static u16 ice_get_rxq_count(struct ice_pf *pf)
+{
+ return min(ice_get_avail_rxq_count(pf), num_online_cpus());
+}
+
+static u16 ice_get_txq_count(struct ice_pf *pf)
+{
+ return min(ice_get_avail_txq_count(pf), num_online_cpus());
+}
+
/**
* ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI
* @vsi: the VSI being configured
@@ -178,9 +188,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
vsi->alloc_txq = vsi->req_txq;
vsi->num_txq = vsi->req_txq;
} else {
- vsi->alloc_txq = min3(pf->num_lan_msix,
- ice_get_avail_txq_count(pf),
- (u16)num_online_cpus());
+ vsi->alloc_txq = ice_get_txq_count(pf);
}
pf->num_lan_tx = vsi->alloc_txq;
@@ -193,17 +201,13 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
vsi->alloc_rxq = vsi->req_rxq;
vsi->num_rxq = vsi->req_rxq;
} else {
- vsi->alloc_rxq = min3(pf->num_lan_msix,
- ice_get_avail_rxq_count(pf),
- (u16)num_online_cpus());
+ vsi->alloc_rxq = ice_get_rxq_count(pf);
}
}
pf->num_lan_rx = vsi->alloc_rxq;
- vsi->num_q_vectors = min_t(int, pf->num_lan_msix,
- max_t(int, vsi->alloc_rxq,
- vsi->alloc_txq));
+ vsi->num_q_vectors = max(vsi->alloc_rxq, vsi->alloc_txq);
break;
case ICE_VSI_SF:
vsi->alloc_txq = 1;
@@ -567,6 +571,8 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch)
return -ENOMEM;
}
+ vsi->irq_dyn_alloc = pci_msix_can_alloc_dyn(vsi->back->pdev);
+
switch (vsi->type) {
case ICE_VSI_PF:
case ICE_VSI_SF:
@@ -827,7 +833,13 @@ bool ice_is_safe_mode(struct ice_pf *pf)
*/
bool ice_is_rdma_ena(struct ice_pf *pf)
{
- return test_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+ union devlink_param_value value;
+ int err;
+
+ err = devl_param_driverinit_value_get(priv_to_devlink(pf),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ &value);
+ return err ? test_bit(ICE_FLAG_RDMA_ENA, pf->flags) : value.vbool;
}
/**
@@ -1173,12 +1185,11 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
static void
ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
{
- struct ice_pf *pf = vsi->back;
u16 qcount, qmap;
u8 offset = 0;
int pow;
- qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix);
+ qcount = vsi->num_rxq;
pow = order_base_2(qcount);
qmap = FIELD_PREP(ICE_AQ_VSI_TC_Q_OFFSET_M, offset);
@@ -1764,9 +1775,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi)
* @prio: priority for the RXDID for this queue
* @ena_ts: true to enable timestamp and false to disable timestamp
*/
-void
-ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
- bool ena_ts)
+void ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
+ bool ena_ts)
{
int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
@@ -3882,7 +3892,7 @@ void ice_init_feature_support(struct ice_pf *pf)
ice_set_feature_support(pf, ICE_F_CGU);
if (ice_is_clock_mux_in_netlist(&pf->hw))
ice_set_feature_support(pf, ICE_F_SMA_CTRL);
- if (ice_gnss_is_gps_present(&pf->hw))
+ if (ice_gnss_is_module_present(&pf->hw))
ice_set_feature_support(pf, ICE_F_GNSS);
break;
default:
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c3a0fb97c5ee..b084839eb811 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3304,22 +3304,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
if (oicr & PFINT_OICR_TSYN_TX_M) {
ena_mask &= ~PFINT_OICR_TSYN_TX_M;
- if (ice_pf_state_is_nominal(pf) &&
- pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) {
- struct ice_ptp_tx *tx = &pf->ptp.port.tx;
- unsigned long flags;
- u8 idx;
-
- spin_lock_irqsave(&tx->lock, flags);
- idx = find_next_bit_wrap(tx->in_use, tx->len,
- tx->last_ll_ts_idx_read + 1);
- if (idx != tx->len)
- ice_ptp_req_tx_single_tstamp(tx, idx);
- spin_unlock_irqrestore(&tx->lock, flags);
- } else if (ice_ptp_pf_handles_tx_interrupt(pf)) {
- set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread);
- ret = IRQ_WAKE_THREAD;
- }
+
+ ret = ice_ptp_ts_irq(pf);
}
if (oicr & PFINT_OICR_TSYN_EVNT_M) {
@@ -4066,8 +4052,7 @@ static void ice_set_pf_caps(struct ice_pf *pf)
}
clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
- if (func_caps->common_cap.ieee_1588 &&
- !(pf->hw.mac_type == ICE_MAC_E830))
+ if (func_caps->common_cap.ieee_1588)
set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
pf->max_pf_txqs = func_caps->common_cap.num_txq;
@@ -5087,6 +5072,12 @@ static int ice_init(struct ice_pf *pf)
if (err)
return err;
+ if (pf->hw.mac_type == ICE_MAC_E830) {
+ err = pci_enable_ptm(pf->pdev, NULL);
+ if (err)
+ dev_dbg(ice_pf_to_dev(pf), "PCIe PTM not supported by PCIe bus/controller\n");
+ }
+
err = ice_alloc_vsis(pf);
if (err)
goto err_alloc_vsis;
@@ -5186,11 +5177,12 @@ int ice_load(struct ice_pf *pf)
ice_napi_add(vsi);
+ ice_init_features(pf);
+
err = ice_init_rdma(pf);
if (err)
goto err_init_rdma;
- ice_init_features(pf);
ice_service_task_restart(pf);
clear_bit(ICE_DOWN, pf->state);
@@ -5198,6 +5190,7 @@ int ice_load(struct ice_pf *pf)
return 0;
err_init_rdma:
+ ice_deinit_features(pf);
ice_tc_indir_block_unregister(vsi);
err_tc_indir_block_register:
ice_unregister_netdev(vsi);
@@ -5221,8 +5214,8 @@ void ice_unload(struct ice_pf *pf)
devl_assert_locked(priv_to_devlink(pf));
- ice_deinit_features(pf);
ice_deinit_rdma(pf);
+ ice_deinit_features(pf);
ice_tc_indir_block_unregister(vsi);
ice_unregister_netdev(vsi);
ice_devlink_destroy_pf_port(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index e26320ce52ca..1bb0033347c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -298,8 +298,8 @@ void ice_ptp_restore_timestamp_mode(struct ice_pf *pf)
* @sts: Optional parameter for holding a pair of system timestamps from
* the system clock. Will be ignored if NULL is given.
*/
-static u64
-ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
+u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf,
+ struct ptp_system_timestamp *sts)
{
struct ice_hw *hw = &pf->hw;
u32 hi, lo, lo2;
@@ -310,6 +310,15 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
/* Read the system timestamp pre PHC read */
ptp_read_system_prets(sts);
+ if (hw->mac_type == ICE_MAC_E830) {
+ u64 clk_time = rd64(hw, E830_GLTSYN_TIME_L(tmr_idx));
+
+ /* Read the system timestamp post PHC read */
+ ptp_read_system_postts(sts);
+
+ return clk_time;
+ }
+
lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
/* Read the system timestamp post PHC read */
@@ -972,28 +981,6 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
}
/**
- * ice_ptp_init_tx_eth56g - Initialize tracking for Tx timestamps
- * @pf: Board private structure
- * @tx: the Tx tracking structure to initialize
- * @port: the port this structure tracks
- *
- * Initialize the Tx timestamp tracker for this port. ETH56G PHYs
- * have independent memory blocks for all ports.
- *
- * Return: 0 for success, -ENOMEM when failed to allocate Tx tracker
- */
-static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx,
- u8 port)
-{
- tx->block = port;
- tx->offset = 0;
- tx->len = INDEX_PER_PORT_ETH56G;
- tx->has_ready_bitmap = 1;
-
- return ice_ptp_alloc_tx_tracker(tx);
-}
-
-/**
* ice_ptp_init_tx_e82x - Initialize tracking for Tx timestamps
* @pf: Board private structure
* @tx: the Tx tracking structure to initialize
@@ -1003,9 +990,11 @@ static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx,
* the timestamp block is shared for all ports in the same quad. To avoid
* ports using the same timestamp index, logically break the block of
* registers into chunks based on the port number.
+ *
+ * Return: 0 on success, -ENOMEM when out of memory
*/
-static int
-ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
+static int ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx,
+ u8 port)
{
tx->block = ICE_GET_QUAD_NUM(port);
tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E82X;
@@ -1016,24 +1005,27 @@ ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
}
/**
- * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps
+ * ice_ptp_init_tx - Initialize tracking for Tx timestamps
* @pf: Board private structure
* @tx: the Tx tracking structure to initialize
+ * @port: the port this structure tracks
+ *
+ * Initialize the Tx timestamp tracker for this PF. For all PHYs except E82X,
+ * each port has its own block of timestamps, independent of the other ports.
*
- * Initialize the Tx timestamp tracker for this PF. For E810 devices, each
- * port has its own block of timestamps, independent of the other ports.
+ * Return: 0 on success, -ENOMEM when out of memory
*/
-static int
-ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
+static int ice_ptp_init_tx(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
{
- tx->block = pf->hw.port_info->lport;
+ tx->block = port;
tx->offset = 0;
- tx->len = INDEX_PER_PORT_E810;
+ tx->len = INDEX_PER_PORT;
+
/* The E810 PHY does not provide a timestamp ready bitmap. Instead,
* verify new timestamps against cached copy of the last read
* timestamp.
*/
- tx->has_ready_bitmap = 0;
+ tx->has_ready_bitmap = pf->hw.mac_type != ICE_MAC_E810;
return ice_ptp_alloc_tx_tracker(tx);
}
@@ -1318,20 +1310,21 @@ ice_ptp_port_phy_stop(struct ice_ptp_port *ptp_port)
struct ice_hw *hw = &pf->hw;
int err;
- if (ice_is_e810(hw))
- return 0;
-
mutex_lock(&ptp_port->ps_lock);
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- err = ice_stop_phy_timer_eth56g(hw, port, true);
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ case ICE_MAC_E830:
+ err = 0;
break;
- case ICE_PHY_E82X:
+ case ICE_MAC_GENERIC:
kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
err = ice_stop_phy_timer_e82x(hw, port, true);
break;
+ case ICE_MAC_GENERIC_3K_E825:
+ err = ice_stop_phy_timer_eth56g(hw, port, true);
+ break;
default:
err = -ENODEV;
}
@@ -1361,19 +1354,17 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port)
unsigned long flags;
int err;
- if (ice_is_e810(hw))
- return 0;
-
if (!ptp_port->link_up)
return ice_ptp_port_phy_stop(ptp_port);
mutex_lock(&ptp_port->ps_lock);
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- err = ice_start_phy_timer_eth56g(hw, port);
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ case ICE_MAC_E830:
+ err = 0;
break;
- case ICE_PHY_E82X:
+ case ICE_MAC_GENERIC:
/* Start the PHY timer in Vernier mode */
kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
@@ -1398,6 +1389,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port)
kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work,
0);
break;
+ case ICE_MAC_GENERIC_3K_E825:
+ err = ice_start_phy_timer_eth56g(hw, port);
+ break;
default:
err = -ENODEV;
}
@@ -1432,12 +1426,14 @@ void ice_ptp_link_change(struct ice_pf *pf, bool linkup)
/* Skip HW writes if reset is in progress */
if (pf->hw.reset_ongoing)
return;
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_E810:
- /* Do not reconfigure E810 PHY */
+
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ case ICE_MAC_E830:
+ /* Do not reconfigure E810 or E830 PHY */
return;
- case ICE_PHY_ETH56G:
- case ICE_PHY_E82X:
+ case ICE_MAC_GENERIC:
+ case ICE_MAC_GENERIC_3K_E825:
ice_ptp_port_phy_restart(ptp_port);
return;
default:
@@ -1465,46 +1461,45 @@ static int ice_ptp_cfg_phy_interrupt(struct ice_pf *pf, bool ena, u32 threshold)
ice_ptp_reset_ts_memory(hw);
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G: {
- int port;
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ case ICE_MAC_E830:
+ return 0;
+ case ICE_MAC_GENERIC: {
+ int quad;
- for (port = 0; port < hw->ptp.num_lports; port++) {
+ for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports);
+ quad++) {
int err;
- err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold);
+ err = ice_phy_cfg_intr_e82x(hw, quad, ena, threshold);
if (err) {
- dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n",
- port, err);
+ dev_err(dev, "Failed to configure PHY interrupt for quad %d, err %d\n",
+ quad, err);
return err;
}
}
return 0;
}
- case ICE_PHY_E82X: {
- int quad;
+ case ICE_MAC_GENERIC_3K_E825: {
+ int port;
- for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports);
- quad++) {
+ for (port = 0; port < hw->ptp.num_lports; port++) {
int err;
- err = ice_phy_cfg_intr_e82x(hw, quad, ena, threshold);
+ err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold);
if (err) {
- dev_err(dev, "Failed to configure PHY interrupt for quad %d, err %d\n",
- quad, err);
+ dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n",
+ port, err);
return err;
}
}
return 0;
}
- case ICE_PHY_E810:
- return 0;
- case ICE_PHY_UNSUP:
+ case ICE_MAC_UNKNOWN:
default:
- dev_warn(dev, "%s: Unexpected PHY model %d\n", __func__,
- ice_get_phy_model(hw));
return -EOPNOTSUPP;
}
}
@@ -1740,7 +1735,7 @@ static int ice_ptp_write_perout(struct ice_hw *hw, unsigned int chan,
/* 0. Reset mode & out_en in AUX_OUT */
wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0);
- if (ice_is_e825c(hw)) {
+ if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) {
int err;
/* Enable/disable CGU 1PPS output for E825C */
@@ -1824,7 +1819,7 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq,
return ice_ptp_write_perout(hw, rq->index, gpio_pin, 0, 0);
if (strncmp(pf->ptp.pin_desc[pin_desc_idx].name, "1PPS", 64) == 0 &&
- period != NSEC_PER_SEC && hw->ptp.phy_model == ICE_PHY_E82X) {
+ period != NSEC_PER_SEC && hw->mac_type == ICE_MAC_GENERIC) {
dev_err(ice_pf_to_dev(pf), "1PPS pin supports only 1 s period\n");
return -EOPNOTSUPP;
}
@@ -2078,7 +2073,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
/* For Vernier mode on E82X, we need to recalibrate after new settime.
* Start with marking timestamps as invalid.
*/
- if (ice_get_phy_model(hw) == ICE_PHY_E82X) {
+ if (hw->mac_type == ICE_MAC_GENERIC) {
err = ice_ptp_clear_phy_offset_ready_e82x(hw);
if (err)
dev_warn(ice_pf_to_dev(pf), "Failed to mark timestamps as invalid before settime\n");
@@ -2102,7 +2097,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
ice_ptp_enable_all_perout(pf);
/* Recalibrate and re-enable timestamp blocks for E822/E823 */
- if (ice_get_phy_model(hw) == ICE_PHY_E82X)
+ if (hw->mac_type == ICE_MAC_GENERIC)
ice_ptp_restart_all_phy(pf);
exit:
if (err) {
@@ -2180,93 +2175,157 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta)
return 0;
}
+/**
+ * struct ice_crosststamp_cfg - Device cross timestamp configuration
+ * @lock_reg: The hardware semaphore lock to use
+ * @lock_busy: Bit in the semaphore lock indicating the lock is busy
+ * @ctl_reg: The hardware register to request cross timestamp
+ * @ctl_active: Bit in the control register to request cross timestamp
+ * @art_time_l: Lower 32-bits of ART system time
+ * @art_time_h: Upper 32-bits of ART system time
+ * @dev_time_l: Lower 32-bits of device time (per timer index)
+ * @dev_time_h: Upper 32-bits of device time (per timer index)
+ */
+struct ice_crosststamp_cfg {
+ /* HW semaphore lock register */
+ u32 lock_reg;
+ u32 lock_busy;
+
+ /* Capture control register */
+ u32 ctl_reg;
+ u32 ctl_active;
+
+ /* Time storage */
+ u32 art_time_l;
+ u32 art_time_h;
+ u32 dev_time_l[2];
+ u32 dev_time_h[2];
+};
+
+static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e82x = {
+ .lock_reg = PFHH_SEM,
+ .lock_busy = PFHH_SEM_BUSY_M,
+ .ctl_reg = GLHH_ART_CTL,
+ .ctl_active = GLHH_ART_CTL_ACTIVE_M,
+ .art_time_l = GLHH_ART_TIME_L,
+ .art_time_h = GLHH_ART_TIME_H,
+ .dev_time_l[0] = GLTSYN_HHTIME_L(0),
+ .dev_time_h[0] = GLTSYN_HHTIME_H(0),
+ .dev_time_l[1] = GLTSYN_HHTIME_L(1),
+ .dev_time_h[1] = GLTSYN_HHTIME_H(1),
+};
+
#ifdef CONFIG_ICE_HWTS
+static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e830 = {
+ .lock_reg = E830_PFPTM_SEM,
+ .lock_busy = E830_PFPTM_SEM_BUSY_M,
+ .ctl_reg = E830_GLPTM_ART_CTL,
+ .ctl_active = E830_GLPTM_ART_CTL_ACTIVE_M,
+ .art_time_l = E830_GLPTM_ART_TIME_L,
+ .art_time_h = E830_GLPTM_ART_TIME_H,
+ .dev_time_l[0] = E830_GLTSYN_PTMTIME_L(0),
+ .dev_time_h[0] = E830_GLTSYN_PTMTIME_H(0),
+ .dev_time_l[1] = E830_GLTSYN_PTMTIME_L(1),
+ .dev_time_h[1] = E830_GLTSYN_PTMTIME_H(1),
+};
+
+#endif /* CONFIG_ICE_HWTS */
+/**
+ * struct ice_crosststamp_ctx - Device cross timestamp context
+ * @snapshot: snapshot of system clocks for historic interpolation
+ * @pf: pointer to the PF private structure
+ * @cfg: pointer to hardware configuration for cross timestamp
+ */
+struct ice_crosststamp_ctx {
+ struct system_time_snapshot snapshot;
+ struct ice_pf *pf;
+ const struct ice_crosststamp_cfg *cfg;
+};
+
/**
- * ice_ptp_get_syncdevicetime - Get the cross time stamp info
+ * ice_capture_crosststamp - Capture a device/system cross timestamp
* @device: Current device time
* @system: System counter value read synchronously with device time
- * @ctx: Context provided by timekeeping code
+ * @__ctx: Context passed from ice_ptp_getcrosststamp
*
* Read device and system (ART) clock simultaneously and return the corrected
* clock values in ns.
+ *
+ * Return: zero on success, or a negative error code on failure.
*/
-static int
-ice_ptp_get_syncdevicetime(ktime_t *device,
- struct system_counterval_t *system,
- void *ctx)
+static int ice_capture_crosststamp(ktime_t *device,
+ struct system_counterval_t *system,
+ void *__ctx)
{
- struct ice_pf *pf = (struct ice_pf *)ctx;
- struct ice_hw *hw = &pf->hw;
- u32 hh_lock, hh_art_ctl;
- int i;
+ struct ice_crosststamp_ctx *ctx = __ctx;
+ const struct ice_crosststamp_cfg *cfg;
+ u32 lock, ctl, ts_lo, ts_hi, tmr_idx;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int err;
+ u64 ts;
-#define MAX_HH_HW_LOCK_TRIES 5
-#define MAX_HH_CTL_LOCK_TRIES 100
+ cfg = ctx->cfg;
+ pf = ctx->pf;
+ hw = &pf->hw;
- for (i = 0; i < MAX_HH_HW_LOCK_TRIES; i++) {
- /* Get the HW lock */
- hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
- if (hh_lock & PFHH_SEM_BUSY_M) {
- usleep_range(10000, 15000);
- continue;
- }
- break;
- }
- if (hh_lock & PFHH_SEM_BUSY_M) {
- dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n");
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+ if (tmr_idx > 1)
+ return -EINVAL;
+
+ /* Poll until we obtain the cross-timestamp hardware semaphore */
+ err = rd32_poll_timeout(hw, cfg->lock_reg, lock,
+ !(lock & cfg->lock_busy),
+ 10 * USEC_PER_MSEC, 50 * USEC_PER_MSEC);
+ if (err) {
+ dev_err(ice_pf_to_dev(pf), "PTP failed to get cross timestamp lock\n");
return -EBUSY;
}
+ /* Snapshot system time for historic interpolation */
+ ktime_get_snapshot(&ctx->snapshot);
+
/* Program cmd to master timer */
ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME);
/* Start the ART and device clock sync sequence */
- hh_art_ctl = rd32(hw, GLHH_ART_CTL);
- hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M;
- wr32(hw, GLHH_ART_CTL, hh_art_ctl);
-
- for (i = 0; i < MAX_HH_CTL_LOCK_TRIES; i++) {
- /* Wait for sync to complete */
- hh_art_ctl = rd32(hw, GLHH_ART_CTL);
- if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) {
- udelay(1);
- continue;
- } else {
- u32 hh_ts_lo, hh_ts_hi, tmr_idx;
- u64 hh_ts;
-
- tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
- /* Read ART time */
- hh_ts_lo = rd32(hw, GLHH_ART_TIME_L);
- hh_ts_hi = rd32(hw, GLHH_ART_TIME_H);
- hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo;
- system->cycles = hh_ts;
- system->cs_id = CSID_X86_ART;
- /* Read Device source clock time */
- hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx));
- hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx));
- hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo;
- *device = ns_to_ktime(hh_ts);
- break;
- }
- }
+ ctl = rd32(hw, cfg->ctl_reg);
+ ctl |= cfg->ctl_active;
+ wr32(hw, cfg->ctl_reg, ctl);
+ /* Poll until hardware completes the capture */
+ err = rd32_poll_timeout(hw, cfg->ctl_reg, ctl, !(ctl & cfg->ctl_active),
+ 5, 20 * USEC_PER_MSEC);
+ if (err)
+ goto err_timeout;
+
+ /* Read ART system time */
+ ts_lo = rd32(hw, cfg->art_time_l);
+ ts_hi = rd32(hw, cfg->art_time_h);
+ ts = ((u64)ts_hi << 32) | ts_lo;
+ system->cycles = ts;
+ system->cs_id = CSID_X86_ART;
+
+ /* Read Device source clock time */
+ ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]);
+ ts_hi = rd32(hw, cfg->dev_time_h[tmr_idx]);
+ ts = ((u64)ts_hi << 32) | ts_lo;
+ *device = ns_to_ktime(ts);
+
+err_timeout:
/* Clear the master timer */
ice_ptp_src_cmd(hw, ICE_PTP_NOP);
/* Release HW lock */
- hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
- hh_lock = hh_lock & ~PFHH_SEM_BUSY_M;
- wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock);
-
- if (i == MAX_HH_CTL_LOCK_TRIES)
- return -ETIMEDOUT;
+ lock = rd32(hw, cfg->lock_reg);
+ lock &= ~cfg->lock_busy;
+ wr32(hw, cfg->lock_reg, lock);
- return 0;
+ return err;
}
/**
- * ice_ptp_getcrosststamp_e82x - Capture a device cross timestamp
+ * ice_ptp_getcrosststamp - Capture a device cross timestamp
* @info: the driver's PTP info structure
* @cts: The memory to fill the cross timestamp info
*
@@ -2274,22 +2333,36 @@ ice_ptp_get_syncdevicetime(ktime_t *device,
* clock. Fill the cross timestamp information and report it back to the
* caller.
*
- * This is only valid for E822 and E823 devices which have support for
- * generating the cross timestamp via PCIe PTM.
- *
* In order to correctly correlate the ART timestamp back to the TSC time, the
* CPU must have X86_FEATURE_TSC_KNOWN_FREQ.
+ *
+ * Return: zero on success, or a negative error code on failure.
*/
-static int
-ice_ptp_getcrosststamp_e82x(struct ptp_clock_info *info,
- struct system_device_crosststamp *cts)
+static int ice_ptp_getcrosststamp(struct ptp_clock_info *info,
+ struct system_device_crosststamp *cts)
{
struct ice_pf *pf = ptp_info_to_pf(info);
+ struct ice_crosststamp_ctx ctx = {
+ .pf = pf,
+ };
+
+ switch (pf->hw.mac_type) {
+ case ICE_MAC_GENERIC:
+ case ICE_MAC_GENERIC_3K_E825:
+ ctx.cfg = &ice_crosststamp_cfg_e82x;
+ break;
+#ifdef CONFIG_ICE_HWTS
+ case ICE_MAC_E830:
+ ctx.cfg = &ice_crosststamp_cfg_e830;
+ break;
+#endif /* CONFIG_ICE_HWTS */
+ default:
+ return -EOPNOTSUPP;
+ }
- return get_device_system_crosststamp(ice_ptp_get_syncdevicetime,
- pf, NULL, cts);
+ return get_device_system_crosststamp(ice_capture_crosststamp, &ctx,
+ &ctx.snapshot, cts);
}
-#endif /* CONFIG_ICE_HWTS */
/**
* ice_ptp_get_ts_config - ioctl interface to read the timestamping config
@@ -2550,13 +2623,9 @@ static int ice_ptp_parse_sdp_entries(struct ice_pf *pf, __le16 *entries,
*/
static void ice_ptp_set_funcs_e82x(struct ice_pf *pf)
{
-#ifdef CONFIG_ICE_HWTS
- if (boot_cpu_has(X86_FEATURE_ART) &&
- boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ))
- pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp_e82x;
+ pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp;
-#endif /* CONFIG_ICE_HWTS */
- if (ice_is_e825c(&pf->hw)) {
+ if (pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825) {
pf->ptp.ice_pin_desc = ice_pin_desc_e825c;
pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e825c);
} else {
@@ -2623,6 +2692,28 @@ err:
}
/**
+ * ice_ptp_set_funcs_e830 - Set specialized functions for E830 support
+ * @pf: Board private structure
+ *
+ * Assign functions to the PTP capabiltiies structure for E830 devices.
+ * Functions which operate across all device families should be set directly
+ * in ice_ptp_set_caps. Only add functions here which are distinct for E830
+ * devices.
+ */
+static void ice_ptp_set_funcs_e830(struct ice_pf *pf)
+{
+#ifdef CONFIG_ICE_HWTS
+ if (pcie_ptm_enabled(pf->pdev) && boot_cpu_has(X86_FEATURE_ART))
+ pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp;
+
+#endif /* CONFIG_ICE_HWTS */
+ /* Rest of the config is the same as base E810 */
+ pf->ptp.ice_pin_desc = ice_pin_desc_e810;
+ pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e810);
+ ice_ptp_setup_pin_cfg(pf);
+}
+
+/**
* ice_ptp_set_caps - Set PTP capabilities
* @pf: Board private structure
*/
@@ -2644,10 +2735,20 @@ static void ice_ptp_set_caps(struct ice_pf *pf)
info->enable = ice_ptp_gpio_enable;
info->verify = ice_verify_pin;
- if (ice_is_e810(&pf->hw))
+ switch (pf->hw.mac_type) {
+ case ICE_MAC_E810:
ice_ptp_set_funcs_e810(pf);
- else
+ return;
+ case ICE_MAC_E830:
+ ice_ptp_set_funcs_e830(pf);
+ return;
+ case ICE_MAC_GENERIC:
+ case ICE_MAC_GENERIC_3K_E825:
ice_ptp_set_funcs_e82x(pf);
+ return;
+ default:
+ return;
+ }
}
/**
@@ -2758,6 +2859,65 @@ enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf)
}
/**
+ * ice_ptp_ts_irq - Process the PTP Tx timestamps in IRQ context
+ * @pf: Board private structure
+ *
+ * Return: IRQ_WAKE_THREAD if Tx timestamp read has to be handled in the bottom
+ * half of the interrupt and IRQ_HANDLED otherwise.
+ */
+irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ /* E810 capable of low latency timestamping with interrupt can
+ * request a single timestamp in the top half and wait for
+ * a second LL TS interrupt from the FW when it's ready.
+ */
+ if (hw->dev_caps.ts_dev_info.ts_ll_int_read) {
+ struct ice_ptp_tx *tx = &pf->ptp.port.tx;
+ u8 idx;
+
+ if (!ice_pf_state_is_nominal(pf))
+ return IRQ_HANDLED;
+
+ spin_lock(&tx->lock);
+ idx = find_next_bit_wrap(tx->in_use, tx->len,
+ tx->last_ll_ts_idx_read + 1);
+ if (idx != tx->len)
+ ice_ptp_req_tx_single_tstamp(tx, idx);
+ spin_unlock(&tx->lock);
+
+ return IRQ_HANDLED;
+ }
+ fallthrough; /* non-LL_TS E810 */
+ case ICE_MAC_GENERIC:
+ case ICE_MAC_GENERIC_3K_E825:
+ /* All other devices process timestamps in the bottom half due
+ * to sleeping or polling.
+ */
+ if (!ice_ptp_pf_handles_tx_interrupt(pf))
+ return IRQ_HANDLED;
+
+ set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread);
+ return IRQ_WAKE_THREAD;
+ case ICE_MAC_E830:
+ /* E830 can read timestamps in the top half using rd32() */
+ if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) {
+ /* Process outstanding Tx timestamps. If there
+ * is more work, re-arm the interrupt to trigger again.
+ */
+ wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
+ ice_flush(hw);
+ }
+ return IRQ_HANDLED;
+ default:
+ return IRQ_HANDLED;
+ }
+}
+
+/**
* ice_ptp_maybe_trigger_tx_interrupt - Trigger Tx timstamp interrupt
* @pf: Board private structure
*
@@ -2777,7 +2937,7 @@ static void ice_ptp_maybe_trigger_tx_interrupt(struct ice_pf *pf)
bool trigger_oicr = false;
unsigned int i;
- if (ice_is_e810(hw))
+ if (!pf->ptp.port.tx.has_ready_bitmap)
return;
if (!ice_pf_src_tmr_owned(pf))
@@ -2912,14 +3072,12 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf)
*/
ice_ptp_flush_all_tx_tracker(pf);
- if (!ice_is_e810(hw)) {
- /* Enable quad interrupts */
- err = ice_ptp_cfg_phy_interrupt(pf, true, 1);
- if (err)
- return err;
+ /* Enable quad interrupts */
+ err = ice_ptp_cfg_phy_interrupt(pf, true, 1);
+ if (err)
+ return err;
- ice_ptp_restart_all_phy(pf);
- }
+ ice_ptp_restart_all_phy(pf);
/* Re-enable all periodic outputs and external timestamp events */
ice_ptp_enable_all_perout(pf);
@@ -2971,8 +3129,9 @@ err:
static bool ice_is_primary(struct ice_hw *hw)
{
- return ice_is_e825c(hw) && ice_is_dual(hw) ?
- !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) : true;
+ return hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) ?
+ !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) :
+ true;
}
static int ice_ptp_setup_adapter(struct ice_pf *pf)
@@ -2990,7 +3149,7 @@ static int ice_ptp_setup_pf(struct ice_pf *pf)
struct ice_ptp *ctrl_ptp = ice_get_ctrl_ptp(pf);
struct ice_ptp *ptp = &pf->ptp;
- if (WARN_ON(!ctrl_ptp) || ice_get_phy_model(&pf->hw) == ICE_PHY_UNSUP)
+ if (WARN_ON(!ctrl_ptp) || pf->hw.mac_type == ICE_MAC_UNKNOWN)
return -ENODEV;
INIT_LIST_HEAD(&ptp->port.list_node);
@@ -3007,7 +3166,7 @@ static void ice_ptp_cleanup_pf(struct ice_pf *pf)
{
struct ice_ptp *ptp = &pf->ptp;
- if (ice_get_phy_model(&pf->hw) != ICE_PHY_UNSUP) {
+ if (pf->hw.mac_type != ICE_MAC_UNKNOWN) {
mutex_lock(&pf->adapter->ports.lock);
list_del(&ptp->port.list_node);
mutex_unlock(&pf->adapter->ports.lock);
@@ -3127,6 +3286,8 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp)
* ice_ptp_init_port - Initialize PTP port structure
* @pf: Board private structure
* @ptp_port: PTP port structure
+ *
+ * Return: 0 on success, -ENODEV on invalid MAC type, -ENOMEM on failed alloc.
*/
static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
{
@@ -3134,16 +3295,14 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
mutex_init(&ptp_port->ps_lock);
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- return ice_ptp_init_tx_eth56g(pf, &ptp_port->tx,
- ptp_port->port_num);
- case ICE_PHY_E810:
- return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
- case ICE_PHY_E82X:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ case ICE_MAC_E830:
+ case ICE_MAC_GENERIC_3K_E825:
+ return ice_ptp_init_tx(pf, &ptp_port->tx, ptp_port->port_num);
+ case ICE_MAC_GENERIC:
kthread_init_delayed_work(&ptp_port->ov_work,
ice_ptp_wait_for_offsets);
-
return ice_ptp_init_tx_e82x(pf, &ptp_port->tx,
ptp_port->port_num);
default:
@@ -3162,8 +3321,8 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
*/
static void ice_ptp_init_tx_interrupt_mode(struct ice_pf *pf)
{
- switch (ice_get_phy_model(&pf->hw)) {
- case ICE_PHY_E82X:
+ switch (pf->hw.mac_type) {
+ case ICE_MAC_GENERIC:
/* E822 based PHY has the clock owner process the interrupt
* for all ports.
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index a1d0e988c084..3b769a0cad00 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -128,8 +128,7 @@ struct ice_ptp_tx {
/* Quad and port information for initializing timestamp blocks */
#define INDEX_PER_QUAD 64
#define INDEX_PER_PORT_E82X 16
-#define INDEX_PER_PORT_E810 64
-#define INDEX_PER_PORT_ETH56G 64
+#define INDEX_PER_PORT 64
/**
* struct ice_ptp_port - data used to initialize an external port for PTP
@@ -304,6 +303,9 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx);
void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx);
enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf);
+irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf);
+u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf,
+ struct ptp_system_timestamp *sts);
u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
const struct ice_pkt_ctx *pkt_ctx);
@@ -342,6 +344,17 @@ static inline bool ice_ptp_process_ts(struct ice_pf *pf)
return true;
}
+static inline irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf)
+{
+ return IRQ_HANDLED;
+}
+
+static inline u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf,
+ struct ptp_system_timestamp *sts)
+{
+ return 0;
+}
+
static inline u64
ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
const struct ice_pkt_ctx *pkt_ctx)
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index ec91822e9280..3e824f7b30c0 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -746,7 +746,7 @@ static int ice_init_cgu_e82x(struct ice_hw *hw)
int err;
/* Disable sticky lock detection so lock err reported is accurate */
- if (ice_is_e825c(hw))
+ if (hw->mac_type == ICE_MAC_GENERIC_3K_E825)
err = ice_cfg_cgu_pll_dis_sticky_bits_e825c(hw);
else
err = ice_cfg_cgu_pll_dis_sticky_bits_e82x(hw);
@@ -756,7 +756,7 @@ static int ice_init_cgu_e82x(struct ice_hw *hw)
/* Configure the CGU PLL using the parameters from the function
* capabilities.
*/
- if (ice_is_e825c(hw))
+ if (hw->mac_type == ICE_MAC_GENERIC_3K_E825)
err = ice_cfg_cgu_pll_e825c(hw, ts_info->time_ref,
(enum ice_clk_src)ts_info->clk_src);
else
@@ -827,8 +827,9 @@ static u32 ice_ptp_tmr_cmd_to_port_reg(struct ice_hw *hw,
/* Certain hardware families share the same register values for the
* port register and source timer register.
*/
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ case ICE_MAC_E830:
return ice_ptp_tmr_cmd_to_src_reg(hw, cmd) & TS_CMD_MASK_E810;
default:
break;
@@ -895,6 +896,17 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
ice_flush(hw);
}
+/**
+ * ice_ptp_cfg_sync_delay - Configure PHC to PHY synchronization delay
+ * @hw: pointer to HW struct
+ * @delay: delay between PHC and PHY SYNC command execution in nanoseconds
+ */
+static void ice_ptp_cfg_sync_delay(const struct ice_hw *hw, u32 delay)
+{
+ wr32(hw, GLTSYN_SYNC_DLAY, delay);
+ ice_flush(hw);
+}
+
/* 56G PHY device functions
*
* The following functions operate on devices with the ETH 56G PHY.
@@ -1576,9 +1588,8 @@ static int ice_read_ptp_tstamp_eth56g(struct ice_hw *hw, u8 port, u8 idx,
* lower 8 bits in the low register, and the upper 32 bits in the high
* register.
*/
- *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) |
- FIELD_PREP(TS_PHY_LOW_M, lo);
-
+ *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) |
+ FIELD_PREP(PHY_40B_LOW_M, lo);
return 0;
}
@@ -2729,10 +2740,7 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw)
{
struct ice_ptp_hw *ptp = &hw->ptp;
struct ice_eth56g_params *params;
- u32 phy_rev;
- int err;
- ptp->phy_model = ICE_PHY_ETH56G;
params = &ptp->phy.eth56g;
params->onestep_ena = false;
params->peer_delay = 0;
@@ -2742,9 +2750,6 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw)
ptp->num_lports = params->num_phys * ptp->ports_per_phy;
ice_sb_access_ena_eth56g(hw, true);
- err = ice_read_phy_eth56g(hw, hw->pf_id, PHY_REG_REVISION, &phy_rev);
- if (err || phy_rev != PHY_REVISION_ETH56G)
- ptp->phy_model = ICE_PHY_UNSUP;
}
/* E822 family functions
@@ -3219,7 +3224,8 @@ ice_read_phy_tstamp_e82x(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp)
* lower 8 bits in the low register, and the upper 32 bits in the high
* register.
*/
- *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) | FIELD_PREP(TS_PHY_LOW_M, lo);
+ *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) |
+ FIELD_PREP(PHY_40B_LOW_M, lo);
return 0;
}
@@ -4792,7 +4798,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold)
*/
static void ice_ptp_init_phy_e82x(struct ice_ptp_hw *ptp)
{
- ptp->phy_model = ICE_PHY_E82X;
ptp->num_lports = 8;
ptp->ports_per_phy = 8;
}
@@ -4986,7 +4991,8 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp)
/* For E810 devices, the timestamp is reported with the lower 32 bits
* in the low register, and the upper 8 bits in the high register.
*/
- *tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M);
+ *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) |
+ FIELD_PREP(PHY_EXT_40B_LOW_M, lo);
return 0;
}
@@ -5049,8 +5055,7 @@ static int ice_ptp_init_phc_e810(struct ice_hw *hw)
u8 tmr_idx;
int err;
- /* Ensure synchronization delay is zero */
- wr32(hw, GLTSYN_SYNC_DLAY, 0);
+ ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY);
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx),
@@ -5316,68 +5321,6 @@ ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready)
*/
/**
- * ice_get_pca9575_handle
- * @hw: pointer to the hw struct
- * @pca9575_handle: GPIO controller's handle
- *
- * Find and return the GPIO controller's handle in the netlist.
- * When found - the value will be cached in the hw structure and following calls
- * will return cached value
- */
-static int
-ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle)
-{
- struct ice_aqc_get_link_topo *cmd;
- struct ice_aq_desc desc;
- int status;
- u8 idx;
-
- /* If handle was read previously return cached value */
- if (hw->io_expander_handle) {
- *pca9575_handle = hw->io_expander_handle;
- return 0;
- }
-
- /* If handle was not detected read it from the netlist */
- cmd = &desc.params.get_link_topo;
- ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
-
- /* Set node type to GPIO controller */
- cmd->addr.topo_params.node_type_ctx =
- (ICE_AQC_LINK_TOPO_NODE_TYPE_M &
- ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL);
-
-#define SW_PCA9575_SFP_TOPO_IDX 2
-#define SW_PCA9575_QSFP_TOPO_IDX 1
-
- /* Check if the SW IO expander controlling SMA exists in the netlist. */
- if (hw->device_id == ICE_DEV_ID_E810C_SFP)
- idx = SW_PCA9575_SFP_TOPO_IDX;
- else if (hw->device_id == ICE_DEV_ID_E810C_QSFP)
- idx = SW_PCA9575_QSFP_TOPO_IDX;
- else
- return -EOPNOTSUPP;
-
- cmd->addr.topo_params.index = idx;
-
- status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
- if (status)
- return -EOPNOTSUPP;
-
- /* Verify if we found the right IO expander type */
- if (desc.params.get_link_topo.node_part_num !=
- ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575)
- return -EOPNOTSUPP;
-
- /* If present save the handle and return it */
- hw->io_expander_handle =
- le16_to_cpu(desc.params.get_link_topo.addr.handle);
- *pca9575_handle = hw->io_expander_handle;
-
- return 0;
-}
-
-/**
* ice_read_sma_ctrl
* @hw: pointer to the hw struct
* @data: pointer to data to be read from the GPIO controller
@@ -5442,37 +5385,6 @@ int ice_write_sma_ctrl(struct ice_hw *hw, u8 data)
}
/**
- * ice_read_pca9575_reg
- * @hw: pointer to the hw struct
- * @offset: GPIO controller register offset
- * @data: pointer to data to be read from the GPIO controller
- *
- * Read the register from the GPIO controller
- */
-int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data)
-{
- struct ice_aqc_link_topo_addr link_topo;
- __le16 addr;
- u16 handle;
- int err;
-
- memset(&link_topo, 0, sizeof(link_topo));
-
- err = ice_get_pca9575_handle(hw, &handle);
- if (err)
- return err;
-
- link_topo.handle = cpu_to_le16(handle);
- link_topo.topo_params.node_type_ctx =
- FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
- ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED);
-
- addr = cpu_to_le16((u16)offset);
-
- return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL);
-}
-
-/**
* ice_ptp_read_sdp_ac - read SDP available connections section from NVM
* @hw: pointer to the HW struct
* @entries: returns the SDP available connections section from NVM
@@ -5538,18 +5450,138 @@ exit:
*/
static void ice_ptp_init_phy_e810(struct ice_ptp_hw *ptp)
{
- ptp->phy_model = ICE_PHY_E810;
ptp->num_lports = 8;
ptp->ports_per_phy = 4;
init_waitqueue_head(&ptp->phy.e810.atqbal_wq);
}
+/* E830 functions
+ *
+ * The following functions operate on the E830 series devices.
+ *
+ */
+
+/**
+ * ice_ptp_init_phc_e830 - Perform E830 specific PHC initialization
+ * @hw: pointer to HW struct
+ *
+ * Perform E830-specific PTP hardware clock initialization steps.
+ */
+static void ice_ptp_init_phc_e830(const struct ice_hw *hw)
+{
+ ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY);
+}
+
+/**
+ * ice_ptp_write_direct_incval_e830 - Prep PHY port increment value change
+ * @hw: pointer to HW struct
+ * @incval: The new 40bit increment value to prepare
+ *
+ * Prepare the PHY port for a new increment value by programming the PHC
+ * GLTSYN_INCVAL_L and GLTSYN_INCVAL_H registers. The actual change is
+ * completed by FW automatically.
+ */
+static void ice_ptp_write_direct_incval_e830(const struct ice_hw *hw,
+ u64 incval)
+{
+ u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ wr32(hw, GLTSYN_INCVAL_L(tmr_idx), lower_32_bits(incval));
+ wr32(hw, GLTSYN_INCVAL_H(tmr_idx), upper_32_bits(incval));
+}
+
+/**
+ * ice_ptp_write_direct_phc_time_e830 - Prepare PHY port with initial time
+ * @hw: Board private structure
+ * @time: Time to initialize the PHY port clock to
+ *
+ * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the
+ * initial clock time. The time will not actually be programmed until the
+ * driver issues an ICE_PTP_INIT_TIME command.
+ *
+ * The time value is the upper 32 bits of the PHY timer, usually in units of
+ * nominal nanoseconds.
+ */
+static void ice_ptp_write_direct_phc_time_e830(const struct ice_hw *hw,
+ u64 time)
+{
+ u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ wr32(hw, GLTSYN_TIME_0(tmr_idx), 0);
+ wr32(hw, GLTSYN_TIME_L(tmr_idx), lower_32_bits(time));
+ wr32(hw, GLTSYN_TIME_H(tmr_idx), upper_32_bits(time));
+}
+
+/**
+ * ice_ptp_port_cmd_e830 - Prepare all external PHYs for a timer command
+ * @hw: pointer to HW struct
+ * @cmd: Command to be sent to the port
+ *
+ * Prepare the external PHYs connected to this device for a timer sync
+ * command.
+ *
+ * Return: 0 on success, negative error code when PHY write failed
+ */
+static int ice_ptp_port_cmd_e830(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+ u32 val = ice_ptp_tmr_cmd_to_port_reg(hw, cmd);
+
+ return ice_write_phy_reg_e810(hw, E830_ETH_GLTSYN_CMD, val);
+}
+
+/**
+ * ice_read_phy_tstamp_e830 - Read a PHY timestamp out of the external PHY
+ * @hw: pointer to the HW struct
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the timestamp block of the external PHY
+ * on the E830 device.
+ */
+static void ice_read_phy_tstamp_e830(const struct ice_hw *hw, u8 idx,
+ u64 *tstamp)
+{
+ u32 hi, lo;
+
+ hi = rd32(hw, E830_PRTTSYN_TXTIME_H(idx));
+ lo = rd32(hw, E830_PRTTSYN_TXTIME_L(idx));
+
+ /* For E830 devices, the timestamp is reported with the lower 32 bits
+ * in the low register, and the upper 8 bits in the high register.
+ */
+ *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) |
+ FIELD_PREP(PHY_EXT_40B_LOW_M, lo);
+}
+
+/**
+ * ice_get_phy_tx_tstamp_ready_e830 - Read Tx memory status register
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to read
+ * @tstamp_ready: contents of the Tx memory status register
+ */
+static void ice_get_phy_tx_tstamp_ready_e830(const struct ice_hw *hw, u8 port,
+ u64 *tstamp_ready)
+{
+ *tstamp_ready = rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_H);
+ *tstamp_ready <<= 32;
+ *tstamp_ready |= rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_L);
+}
+
+/**
+ * ice_ptp_init_phy_e830 - initialize PHY parameters
+ * @ptp: pointer to the PTP HW struct
+ */
+static void ice_ptp_init_phy_e830(struct ice_ptp_hw *ptp)
+{
+ ptp->num_lports = 8;
+ ptp->ports_per_phy = 4;
+}
+
/* Device agnostic functions
*
- * The following functions implement shared behavior common to both E822 and
- * E810 devices, possibly calling a device specific implementation where
- * necessary.
+ * The following functions implement shared behavior common to all devices,
+ * possibly calling a device specific implementation where necessary.
*/
/**
@@ -5612,14 +5644,22 @@ void ice_ptp_init_hw(struct ice_hw *hw)
{
struct ice_ptp_hw *ptp = &hw->ptp;
- if (ice_is_e822(hw) || ice_is_e823(hw))
- ice_ptp_init_phy_e82x(ptp);
- else if (ice_is_e810(hw))
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
ice_ptp_init_phy_e810(ptp);
- else if (ice_is_e825c(hw))
+ break;
+ case ICE_MAC_E830:
+ ice_ptp_init_phy_e830(ptp);
+ break;
+ case ICE_MAC_GENERIC:
+ ice_ptp_init_phy_e82x(ptp);
+ break;
+ case ICE_MAC_GENERIC_3K_E825:
ice_ptp_init_phy_e825(hw);
- else
- ptp->phy_model = ICE_PHY_UNSUP;
+ break;
+ default:
+ return;
+ }
}
/**
@@ -5640,11 +5680,11 @@ void ice_ptp_init_hw(struct ice_hw *hw)
static int ice_ptp_write_port_cmd(struct ice_hw *hw, u8 port,
enum ice_ptp_tmr_cmd cmd)
{
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- return ice_ptp_write_port_cmd_eth56g(hw, port, cmd);
- case ICE_PHY_E82X:
+ switch (hw->mac_type) {
+ case ICE_MAC_GENERIC:
return ice_ptp_write_port_cmd_e82x(hw, port, cmd);
+ case ICE_MAC_GENERIC_3K_E825:
+ return ice_ptp_write_port_cmd_eth56g(hw, port, cmd);
default:
return -EOPNOTSUPP;
}
@@ -5705,9 +5745,11 @@ static int ice_ptp_port_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
u32 port;
/* PHY models which can program all ports simultaneously */
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
return ice_ptp_port_cmd_e810(hw, cmd);
+ case ICE_MAC_E830:
+ return ice_ptp_port_cmd_e830(hw, cmd);
default:
break;
}
@@ -5778,23 +5820,29 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time)
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
/* Source timers */
+ /* For E830 we don't need to use shadow registers, its automatic */
+ if (hw->mac_type == ICE_MAC_E830) {
+ ice_ptp_write_direct_phc_time_e830(hw, time);
+ return 0;
+ }
+
wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time));
wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time));
wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0);
/* PHY timers */
/* Fill Rx and Tx ports and send msg to PHY */
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- err = ice_ptp_prep_phy_time_eth56g(hw,
- (u32)(time & 0xFFFFFFFF));
- break;
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF);
break;
- case ICE_PHY_E82X:
+ case ICE_MAC_GENERIC:
err = ice_ptp_prep_phy_time_e82x(hw, time & 0xFFFFFFFF);
break;
+ case ICE_MAC_GENERIC_3K_E825:
+ err = ice_ptp_prep_phy_time_eth56g(hw,
+ (u32)(time & 0xFFFFFFFF));
+ break;
default:
err = -EOPNOTSUPP;
}
@@ -5826,20 +5874,26 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+ /* For E830 we don't need to use shadow registers, its automatic */
+ if (hw->mac_type == ICE_MAC_E830) {
+ ice_ptp_write_direct_incval_e830(hw, incval);
+ return 0;
+ }
+
/* Shadow Adjust */
wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval));
wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval));
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- err = ice_ptp_prep_phy_incval_eth56g(hw, incval);
- break;
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
err = ice_ptp_prep_phy_incval_e810(hw, incval);
break;
- case ICE_PHY_E82X:
+ case ICE_MAC_GENERIC:
err = ice_ptp_prep_phy_incval_e82x(hw, incval);
break;
+ case ICE_MAC_GENERIC_3K_E825:
+ err = ice_ptp_prep_phy_incval_eth56g(hw, incval);
+ break;
default:
err = -EOPNOTSUPP;
}
@@ -5899,16 +5953,19 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0);
wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj);
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- err = ice_ptp_prep_phy_adj_eth56g(hw, adj);
- break;
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
err = ice_ptp_prep_phy_adj_e810(hw, adj);
break;
- case ICE_PHY_E82X:
+ case ICE_MAC_E830:
+ /* E830 sync PHYs automatically after setting GLTSYN_SHADJ */
+ return 0;
+ case ICE_MAC_GENERIC:
err = ice_ptp_prep_phy_adj_e82x(hw, adj);
break;
+ case ICE_MAC_GENERIC_3K_E825:
+ err = ice_ptp_prep_phy_adj_eth56g(hw, adj);
+ break;
default:
err = -EOPNOTSUPP;
}
@@ -5932,13 +5989,16 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
*/
int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
{
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp);
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
return ice_read_phy_tstamp_e810(hw, block, idx, tstamp);
- case ICE_PHY_E82X:
+ case ICE_MAC_E830:
+ ice_read_phy_tstamp_e830(hw, idx, tstamp);
+ return 0;
+ case ICE_MAC_GENERIC:
return ice_read_phy_tstamp_e82x(hw, block, idx, tstamp);
+ case ICE_MAC_GENERIC_3K_E825:
+ return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp);
default:
return -EOPNOTSUPP;
}
@@ -5962,13 +6022,13 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
*/
int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
{
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- return ice_clear_ptp_tstamp_eth56g(hw, block, idx);
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
return ice_clear_phy_tstamp_e810(hw, block, idx);
- case ICE_PHY_E82X:
+ case ICE_MAC_GENERIC:
return ice_clear_phy_tstamp_e82x(hw, block, idx);
+ case ICE_MAC_GENERIC_3K_E825:
+ return ice_clear_ptp_tstamp_eth56g(hw, block, idx);
default:
return -EOPNOTSUPP;
}
@@ -6025,14 +6085,14 @@ static int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx)
*/
void ice_ptp_reset_ts_memory(struct ice_hw *hw)
{
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- ice_ptp_reset_ts_memory_eth56g(hw);
- break;
- case ICE_PHY_E82X:
+ switch (hw->mac_type) {
+ case ICE_MAC_GENERIC:
ice_ptp_reset_ts_memory_e82x(hw);
break;
- case ICE_PHY_E810:
+ case ICE_MAC_GENERIC_3K_E825:
+ ice_ptp_reset_ts_memory_eth56g(hw);
+ break;
+ case ICE_MAC_E810:
default:
return;
}
@@ -6054,13 +6114,16 @@ int ice_ptp_init_phc(struct ice_hw *hw)
/* Clear event err indications for auxiliary pins */
(void)rd32(hw, GLTSYN_STAT(src_idx));
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- return ice_ptp_init_phc_eth56g(hw);
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
return ice_ptp_init_phc_e810(hw);
- case ICE_PHY_E82X:
+ case ICE_MAC_E830:
+ ice_ptp_init_phc_e830(hw);
+ return 0;
+ case ICE_MAC_GENERIC:
return ice_ptp_init_phc_e82x(hw);
+ case ICE_MAC_GENERIC_3K_E825:
+ return ice_ptp_init_phc_eth56g(hw);
default:
return -EOPNOTSUPP;
}
@@ -6079,17 +6142,19 @@ int ice_ptp_init_phc(struct ice_hw *hw)
*/
int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready)
{
- switch (ice_get_phy_model(hw)) {
- case ICE_PHY_ETH56G:
- return ice_get_phy_tx_tstamp_ready_eth56g(hw, block,
- tstamp_ready);
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
return ice_get_phy_tx_tstamp_ready_e810(hw, block,
tstamp_ready);
- case ICE_PHY_E82X:
+ case ICE_MAC_E830:
+ ice_get_phy_tx_tstamp_ready_e830(hw, block, tstamp_ready);
+ return 0;
+ case ICE_MAC_GENERIC:
return ice_get_phy_tx_tstamp_ready_e82x(hw, block,
tstamp_ready);
- break;
+ case ICE_MAC_GENERIC_3K_E825:
+ return ice_get_phy_tx_tstamp_ready_eth56g(hw, block,
+ tstamp_ready);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
index 6779ce120515..8442d1d60351 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -324,6 +324,7 @@ extern const struct ice_vernier_info_e82x e822_vernier[NUM_ICE_PTP_LNK_SPD];
*/
#define ICE_E810_PLL_FREQ 812500000
#define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL
+#define ICE_E810_E830_SYNC_DELAY 0
/* Device agnostic functions */
u8 ice_get_ptp_src_clock_index(struct ice_hw *hw);
@@ -395,7 +396,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold);
/* E810 family functions */
int ice_read_sma_ctrl(struct ice_hw *hw, u8 *data);
int ice_write_sma_ctrl(struct ice_hw *hw, u8 data);
-int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data);
int ice_ptp_read_sdp_ac(struct ice_hw *hw, __le16 *entries, uint *num_entries);
int ice_cgu_get_num_pins(struct ice_hw *hw, bool input);
enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input);
@@ -431,13 +431,14 @@ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port);
*/
static inline u64 ice_get_base_incval(struct ice_hw *hw)
{
- switch (hw->ptp.phy_model) {
- case ICE_PHY_ETH56G:
- return ICE_ETH56G_NOMINAL_INCVAL;
- case ICE_PHY_E810:
+ switch (hw->mac_type) {
+ case ICE_MAC_E810:
+ case ICE_MAC_E830:
return ICE_PTP_NOMINAL_INCVAL_E810;
- case ICE_PHY_E82X:
+ case ICE_MAC_GENERIC:
return ice_e82x_nominal_incval(ice_e82x_time_ref(hw));
+ case ICE_MAC_GENERIC_3K_E825:
+ return ICE_ETH56G_NOMINAL_INCVAL;
default:
return 0;
}
@@ -650,18 +651,25 @@ static inline bool ice_is_dual(struct ice_hw *hw)
/* E810 timer command register */
#define E810_ETH_GLTSYN_CMD 0x03000344
+/* E830 timer command register */
+#define E830_ETH_GLTSYN_CMD 0x00088814
+
+/* E810 PHC time register */
+#define E830_GLTSYN_TIME_L(_tmr_idx) (0x0008A000 + 0x1000 * (_tmr_idx))
+
/* Source timer incval macros */
#define INCVAL_HIGH_M 0xFF
-/* Timestamp block macros */
+/* PHY 40b registers macros */
+#define PHY_EXT_40B_LOW_M GENMASK(31, 0)
+#define PHY_EXT_40B_HIGH_M GENMASK_ULL(39, 32)
+#define PHY_40B_LOW_M GENMASK(7, 0)
+#define PHY_40B_HIGH_M GENMASK_ULL(39, 8)
#define TS_VALID BIT(0)
#define TS_LOW_M 0xFFFFFFFF
#define TS_HIGH_M 0xFF
#define TS_HIGH_S 32
-#define TS_PHY_LOW_M GENMASK(7, 0)
-#define TS_PHY_HIGH_M GENMASK_ULL(39, 8)
-
#define BYTES_PER_IDX_ADDR_L_U 8
#define BYTES_PER_IDX_ADDR_L 4
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index b83f99c01d91..33eac29b6a50 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -123,27 +123,6 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
}
/**
- * ice_sriov_free_msix_res - Reset/free any used MSIX resources
- * @pf: pointer to the PF structure
- *
- * Since no MSIX entries are taken from the pf->irq_tracker then just clear
- * the pf->sriov_base_vector.
- *
- * Returns 0 on success, and -EINVAL on error.
- */
-static int ice_sriov_free_msix_res(struct ice_pf *pf)
-{
- if (!pf)
- return -EINVAL;
-
- bitmap_free(pf->sriov_irq_bm);
- pf->sriov_irq_size = 0;
- pf->sriov_base_vector = 0;
-
- return 0;
-}
-
-/**
* ice_free_vfs - Free all VFs
* @pf: pointer to the PF structure
*/
@@ -177,6 +156,7 @@ void ice_free_vfs(struct ice_pf *pf)
ice_eswitch_detach_vf(pf, vf);
ice_dis_vf_qs(vf);
+ ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix);
if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
/* disable VF qp mappings and set VF disable state */
@@ -200,9 +180,6 @@ void ice_free_vfs(struct ice_pf *pf)
mutex_unlock(&vf->cfg_lock);
}
- if (ice_sriov_free_msix_res(pf))
- dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
-
vfs->num_qps_per = 0;
ice_free_vf_entries(pf);
@@ -372,40 +349,6 @@ void ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
}
/**
- * ice_sriov_set_msix_res - Set any used MSIX resources
- * @pf: pointer to PF structure
- * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
- *
- * This function allows SR-IOV resources to be taken from the end of the PF's
- * allowed HW MSIX vectors so that the irq_tracker will not be affected. We
- * just set the pf->sriov_base_vector and return success.
- *
- * If there are not enough resources available, return an error. This should
- * always be caught by ice_set_per_vf_res().
- *
- * Return 0 on success, and -EINVAL when there are not enough MSIX vectors
- * in the PF's space available for SR-IOV.
- */
-static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
-{
- u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
- int vectors_used = ice_get_max_used_msix_vector(pf);
- int sriov_base_vector;
-
- sriov_base_vector = total_vectors - num_msix_needed;
-
- /* make sure we only grab irq_tracker entries from the list end and
- * that we have enough available MSIX vectors
- */
- if (sriov_base_vector < vectors_used)
- return -EINVAL;
-
- pf->sriov_base_vector = sriov_base_vector;
-
- return 0;
-}
-
-/**
* ice_set_per_vf_res - check if vectors and queues are available
* @pf: pointer to the PF structure
* @num_vfs: the number of SR-IOV VFs being configured
@@ -429,11 +372,9 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
*/
static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
{
- int vectors_used = ice_get_max_used_msix_vector(pf);
u16 num_msix_per_vf, num_txq, num_rxq, avail_qs;
int msix_avail_per_vf, msix_avail_for_sriov;
struct device *dev = ice_pf_to_dev(pf);
- int err;
lockdep_assert_held(&pf->vfs.table_lock);
@@ -441,8 +382,7 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
return -EINVAL;
/* determine MSI-X resources per VF */
- msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors -
- vectors_used;
+ msix_avail_for_sriov = pf->virt_irq_tracker.num_entries;
msix_avail_per_vf = msix_avail_for_sriov / num_vfs;
if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) {
num_msix_per_vf = ICE_NUM_VF_MSIX_MED;
@@ -481,13 +421,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
return -ENOSPC;
}
- err = ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs);
- if (err) {
- dev_err(dev, "Unable to set MSI-X resources for %d VFs, err %d\n",
- num_vfs, err);
- return err;
- }
-
/* only allow equal Tx/Rx queue count (i.e. queue pairs) */
pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq);
pf->vfs.num_msix_per = num_msix_per_vf;
@@ -498,52 +431,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
}
/**
- * ice_sriov_get_irqs - get irqs for SR-IOV usacase
- * @pf: pointer to PF structure
- * @needed: number of irqs to get
- *
- * This returns the first MSI-X vector index in PF space that is used by this
- * VF. This index is used when accessing PF relative registers such as
- * GLINT_VECT2FUNC and GLINT_DYN_CTL.
- * This will always be the OICR index in the AVF driver so any functionality
- * using vf->first_vector_idx for queue configuration_id: id of VF which will
- * use this irqs
- *
- * Only SRIOV specific vectors are tracked in sriov_irq_bm. SRIOV vectors are
- * allocated from the end of global irq index. First bit in sriov_irq_bm means
- * last irq index etc. It simplifies extension of SRIOV vectors.
- * They will be always located from sriov_base_vector to the last irq
- * index. While increasing/decreasing sriov_base_vector can be moved.
- */
-static int ice_sriov_get_irqs(struct ice_pf *pf, u16 needed)
-{
- int res = bitmap_find_next_zero_area(pf->sriov_irq_bm,
- pf->sriov_irq_size, 0, needed, 0);
- /* conversion from number in bitmap to global irq index */
- int index = pf->sriov_irq_size - res - needed;
-
- if (res >= pf->sriov_irq_size || index < pf->sriov_base_vector)
- return -ENOENT;
-
- bitmap_set(pf->sriov_irq_bm, res, needed);
- return index;
-}
-
-/**
- * ice_sriov_free_irqs - free irqs used by the VF
- * @pf: pointer to PF structure
- * @vf: pointer to VF structure
- */
-static void ice_sriov_free_irqs(struct ice_pf *pf, struct ice_vf *vf)
-{
- /* Move back from first vector index to first index in bitmap */
- int bm_i = pf->sriov_irq_size - vf->first_vector_idx - vf->num_msix;
-
- bitmap_clear(pf->sriov_irq_bm, bm_i, vf->num_msix);
- vf->first_vector_idx = 0;
-}
-
-/**
* ice_init_vf_vsi_res - initialize/setup VF VSI resources
* @vf: VF to initialize/setup the VSI for
*
@@ -556,7 +443,7 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf)
struct ice_vsi *vsi;
int err;
- vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix);
+ vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix);
if (vf->first_vector_idx < 0)
return -ENOMEM;
@@ -856,16 +743,10 @@ err_free_entries:
*/
static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
{
- int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
int ret;
- pf->sriov_irq_bm = bitmap_zalloc(total_vectors, GFP_KERNEL);
- if (!pf->sriov_irq_bm)
- return -ENOMEM;
- pf->sriov_irq_size = total_vectors;
-
/* Disable global interrupt 0 so we don't try to handle the VFLR. */
wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
@@ -918,7 +799,6 @@ err_unroll_intr:
/* rearm interrupts here */
ice_irq_dynamic_ena(hw, NULL, NULL);
clear_bit(ICE_OICR_INTR_DIS, pf->state);
- bitmap_free(pf->sriov_irq_bm);
return ret;
}
@@ -992,16 +872,7 @@ u32 ice_sriov_get_vf_total_msix(struct pci_dev *pdev)
{
struct ice_pf *pf = pci_get_drvdata(pdev);
- return pf->sriov_irq_size - ice_get_max_used_msix_vector(pf);
-}
-
-static int ice_sriov_move_base_vector(struct ice_pf *pf, int move)
-{
- if (pf->sriov_base_vector - move < ice_get_max_used_msix_vector(pf))
- return -ENOMEM;
-
- pf->sriov_base_vector -= move;
- return 0;
+ return pf->virt_irq_tracker.num_entries;
}
static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id)
@@ -1020,7 +891,8 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id)
continue;
ice_dis_vf_mappings(tmp_vf);
- ice_sriov_free_irqs(pf, tmp_vf);
+ ice_virt_free_irqs(pf, tmp_vf->first_vector_idx,
+ tmp_vf->num_msix);
vf_ids[to_remap] = tmp_vf->vf_id;
to_remap += 1;
@@ -1032,7 +904,7 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id)
continue;
tmp_vf->first_vector_idx =
- ice_sriov_get_irqs(pf, tmp_vf->num_msix);
+ ice_virt_get_irqs(pf, tmp_vf->num_msix);
/* there is no need to rebuild VSI as we are only changing the
* vector indexes not amount of MSI-X or queues
*/
@@ -1105,20 +977,15 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count)
prev_msix = vf->num_msix;
prev_queues = vf->num_vf_qs;
- if (ice_sriov_move_base_vector(pf, msix_vec_count - prev_msix)) {
- ice_put_vf(vf);
- return -ENOSPC;
- }
-
ice_dis_vf_mappings(vf);
- ice_sriov_free_irqs(pf, vf);
+ ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix);
/* Remap all VFs beside the one is now configured */
ice_sriov_remap_vectors(pf, vf->vf_id);
vf->num_msix = msix_vec_count;
vf->num_vf_qs = queues;
- vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix);
+ vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix);
if (vf->first_vector_idx < 0)
goto unroll;
@@ -1147,7 +1014,8 @@ unroll:
vf->num_msix = prev_msix;
vf->num_vf_qs = prev_queues;
- vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix);
+
+ vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix);
if (vf->first_vector_idx < 0) {
ice_put_vf(vf);
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 33a1a5934c0d..0aab21113cc4 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -871,14 +871,6 @@ union ice_phy_params {
struct ice_eth56g_params eth56g;
};
-/* PHY model */
-enum ice_phy_model {
- ICE_PHY_UNSUP = -1,
- ICE_PHY_E810 = 1,
- ICE_PHY_E82X,
- ICE_PHY_ETH56G,
-};
-
/* Global Link Topology */
enum ice_global_link_topo {
ICE_LINK_TOPO_UP_TO_2_LINKS,
@@ -888,7 +880,6 @@ enum ice_global_link_topo {
};
struct ice_ptp_hw {
- enum ice_phy_model phy_model;
union ice_phy_params phy;
u8 num_lports;
u8 ports_per_phy;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 4261fe1c2bcd..799b2c1f1184 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -124,6 +124,9 @@ struct ice_vf {
u8 spoofchk:1;
u8 link_forced:1;
u8 link_up:1; /* only valid if VF link is forced */
+
+ u32 ptp_caps;
+
unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */
unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */
DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index ff4ad788d96a..674767781fe4 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -498,6 +498,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_QOS)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_QOS;
+ if (vf->driver_caps & VIRTCHNL_VF_CAP_PTP)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_PTP;
+
vfres->num_vsis = 1;
/* Tx and Rx queue are equal for VF */
vfres->num_queue_pairs = vsi->num_txq;
@@ -1975,6 +1978,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
struct ice_vsi *vsi;
u8 act_prt, pri_prt;
int i = -1, q_idx;
+ bool ena_ts;
lag = pf->lag;
mutex_lock(&pf->lag_mutex);
@@ -2104,9 +2108,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
rxdid = ICE_RXDID_LEGACY_1;
}
+ ena_ts = ((vf->driver_caps &
+ VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) &&
+ (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) &&
+ (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP));
+
ice_write_qrxflxp_cntxt(&vsi->back->hw,
- vsi->rxq_map[q_idx],
- rxdid, 0x03, false);
+ vsi->rxq_map[q_idx], rxdid,
+ ICE_RXDID_PRIO, ena_ts);
}
}
@@ -3031,8 +3040,8 @@ err:
static int ice_vc_query_rxdid(struct ice_vf *vf)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_supported_rxdids rxdid = {};
struct ice_pf *pf = vf->pf;
+ u64 rxdid;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -3044,7 +3053,7 @@ static int ice_vc_query_rxdid(struct ice_vf *vf)
goto err;
}
- rxdid.supported_rxdids = pf->supported_rxdids;
+ rxdid = pf->supported_rxdids;
err:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS,
@@ -4092,6 +4101,59 @@ out:
v_ret, NULL, 0);
}
+static int ice_vc_get_ptp_cap(struct ice_vf *vf,
+ const struct virtchnl_ptp_caps *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ u32 caps = VIRTCHNL_1588_PTP_CAP_RX_TSTAMP |
+ VIRTCHNL_1588_PTP_CAP_READ_PHC;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ goto err;
+
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+
+ if (msg->caps & caps)
+ vf->ptp_caps = caps;
+
+err:
+ /* send the response back to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_1588_PTP_GET_CAPS, v_ret,
+ (u8 *)&vf->ptp_caps,
+ sizeof(struct virtchnl_ptp_caps));
+}
+
+static int ice_vc_get_phc_time(struct ice_vf *vf)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ struct virtchnl_phc_time *phc_time = NULL;
+ struct ice_pf *pf = vf->pf;
+ u32 len = 0;
+ int ret;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ goto err;
+
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+
+ phc_time = kzalloc(sizeof(*phc_time), GFP_KERNEL);
+ if (!phc_time) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ goto err;
+ }
+
+ len = sizeof(*phc_time);
+
+ phc_time->time = ice_ptp_read_src_clk_reg(pf, NULL);
+
+err:
+ /* send the response back to the VF */
+ ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_1588_PTP_GET_TIME, v_ret,
+ (u8 *)phc_time, len);
+ kfree(phc_time);
+ return ret;
+}
+
static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = {
.get_ver_msg = ice_vc_get_ver_msg,
.get_vf_res_msg = ice_vc_get_vf_res_msg,
@@ -4128,6 +4190,8 @@ static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = {
.get_qos_caps = ice_vc_get_qos_caps,
.cfg_q_bw = ice_vc_cfg_q_bw,
.cfg_q_quanta = ice_vc_cfg_q_quanta,
+ .get_ptp_cap = ice_vc_get_ptp_cap,
+ .get_phc_time = ice_vc_get_phc_time,
/* If you add a new op here please make sure to add it to
* ice_virtchnl_repr_ops as well.
*/
@@ -4264,6 +4328,8 @@ static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = {
.get_qos_caps = ice_vc_get_qos_caps,
.cfg_q_bw = ice_vc_cfg_q_bw,
.cfg_q_quanta = ice_vc_cfg_q_quanta,
+ .get_ptp_cap = ice_vc_get_ptp_cap,
+ .get_phc_time = ice_vc_get_phc_time,
};
/**
@@ -4501,6 +4567,12 @@ error_handler:
case VIRTCHNL_OP_CONFIG_QUANTA:
err = ops->cfg_q_quanta(vf, msg);
break;
+ case VIRTCHNL_OP_1588_PTP_GET_CAPS:
+ err = ops->get_ptp_cap(vf, (const void *)msg);
+ break;
+ case VIRTCHNL_OP_1588_PTP_GET_TIME:
+ err = ops->get_phc_time(vf);
+ break;
case VIRTCHNL_OP_UNKNOWN:
default:
dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
index 0c629aef9baf..222990f229d5 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
@@ -26,6 +26,9 @@
#define ICE_MAX_MACADDR_PER_VF 18
#define ICE_FLEX_DESC_RXDID_MAX_NUM 64
+/* Priority to be compared against previous priority from the pipe */
+#define ICE_RXDID_PRIO 0x03
+
/* VFs only get a single VSI. For ice hardware, the VF does not need to know
* its VSI index. However, the virtchnl interface requires a VSI number,
* mainly due to legacy hardware.
@@ -72,6 +75,9 @@ struct ice_virtchnl_ops {
int (*cfg_q_tc_map)(struct ice_vf *vf, u8 *msg);
int (*cfg_q_bw)(struct ice_vf *vf, u8 *msg);
int (*cfg_q_quanta)(struct ice_vf *vf, u8 *msg);
+ int (*get_ptp_cap)(struct ice_vf *vf,
+ const struct virtchnl_ptp_caps *msg);
+ int (*get_phc_time)(struct ice_vf *vf);
};
#ifdef CONFIG_PCI_IOV
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
index c105a82ee136..a3d1579a619a 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
@@ -84,6 +84,12 @@ static const u32 fdir_pf_allowlist_opcodes[] = {
VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER,
};
+/* VIRTCHNL_VF_CAP_PTP */
+static const u32 ptp_allowlist_opcodes[] = {
+ VIRTCHNL_OP_1588_PTP_GET_CAPS,
+ VIRTCHNL_OP_1588_PTP_GET_TIME,
+};
+
static const u32 tc_allowlist_opcodes[] = {
VIRTCHNL_OP_GET_QOS_CAPS, VIRTCHNL_OP_CONFIG_QUEUE_BW,
VIRTCHNL_OP_CONFIG_QUANTA,
@@ -110,6 +116,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = {
ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes),
ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes),
ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_QOS, tc_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_CAP_PTP, ptp_allowlist_opcodes),
};
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 8975d2971bc3..a3a4eaa17739 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019, Intel Corporation. */
#include <linux/bpf_trace.h>
+#include <linux/unroll.h>
#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "ice.h"
@@ -989,7 +990,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring,
struct ice_tx_desc *tx_desc;
u32 i;
- loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+ unrolled_count(PKTS_PER_BATCH)
+ for (i = 0; i < PKTS_PER_BATCH; i++) {
dma_addr_t dma;
dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index 45adeb513253..8dc5d55e26c5 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -7,14 +7,6 @@
#define PKTS_PER_BATCH 8
-#ifdef __clang__
-#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for
-#elif __GNUC__ >= 8
-#define loop_unrolled_for _Pragma("GCC unroll 8") for
-#else
-#define loop_unrolled_for for
-#endif
-
struct ice_vsi;
#ifdef CONFIG_XDP_SOCKETS
diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
index dfd7cf1d9aa0..eae1b6f474e6 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@@ -595,7 +595,7 @@ static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc)
*/
static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq,
struct sk_buff *skb,
- struct idpf_rx_csum_decoded csum_bits,
+ struct libeth_rx_csum csum_bits,
struct libeth_rx_pt decoded)
{
bool ipv4, ipv6;
@@ -661,10 +661,10 @@ checksum_fail:
*
* Return: parsed checksum status.
**/
-static struct idpf_rx_csum_decoded
+static struct libeth_rx_csum
idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc)
{
- struct idpf_rx_csum_decoded csum_bits = { };
+ struct libeth_rx_csum csum_bits = { };
u32 rx_error, rx_status;
u64 qword;
@@ -696,10 +696,10 @@ idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc)
*
* Return: parsed checksum status.
**/
-static struct idpf_rx_csum_decoded
+static struct libeth_rx_csum
idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc)
{
- struct idpf_rx_csum_decoded csum_bits = { };
+ struct libeth_rx_csum csum_bits = { };
u16 rx_status0, rx_status1;
rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0);
@@ -798,7 +798,7 @@ idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
u16 ptype)
{
struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype];
- struct idpf_rx_csum_decoded csum_bits;
+ struct libeth_rx_csum csum_bits;
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_q->netdev);
@@ -891,6 +891,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
* idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
* @rx_desc: the descriptor to process
* @fields: storage for extracted values
+ * @ptype: pointer that will store packet type
*
* Decode the Rx descriptor and extract relevant information including the
* size and Rx packet type.
@@ -900,20 +901,21 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
*/
static void
idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc,
- struct idpf_rx_extracted *fields)
+ struct libeth_rqe_info *fields, u32 *ptype)
{
u64 qword;
qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
- fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword);
- fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword);
+ fields->len = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword);
+ *ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword);
}
/**
* idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
* @rx_desc: the descriptor to process
* @fields: storage for extracted values
+ * @ptype: pointer that will store packet type
*
* Decode the Rx descriptor and extract relevant information including the
* size and Rx packet type.
@@ -923,12 +925,12 @@ idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc,
*/
static void
idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc,
- struct idpf_rx_extracted *fields)
+ struct libeth_rqe_info *fields, u32 *ptype)
{
- fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
- le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
- fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M,
- le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0));
+ fields->len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
+ le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
+ *ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M,
+ le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0));
}
/**
@@ -936,17 +938,18 @@ idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc,
* @rx_q: Rx descriptor queue
* @rx_desc: the descriptor to process
* @fields: storage for extracted values
+ * @ptype: pointer that will store packet type
*
*/
static void
idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
const union virtchnl2_rx_desc *rx_desc,
- struct idpf_rx_extracted *fields)
+ struct libeth_rqe_info *fields, u32 *ptype)
{
if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
- idpf_rx_singleq_extract_base_fields(rx_desc, fields);
+ idpf_rx_singleq_extract_base_fields(rx_desc, fields, ptype);
else
- idpf_rx_singleq_extract_flex_fields(rx_desc, fields);
+ idpf_rx_singleq_extract_flex_fields(rx_desc, fields, ptype);
}
/**
@@ -966,9 +969,10 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
/* Process Rx packets bounded by budget */
while (likely(total_rx_pkts < (unsigned int)budget)) {
- struct idpf_rx_extracted fields = { };
+ struct libeth_rqe_info fields = { };
union virtchnl2_rx_desc *rx_desc;
struct idpf_rx_buf *rx_buf;
+ u32 ptype;
/* get the Rx desc from Rx queue based on 'next_to_clean' */
rx_desc = &rx_q->rx[ntc];
@@ -989,16 +993,16 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
*/
dma_rmb();
- idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
+ idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields, &ptype);
rx_buf = &rx_q->rx_buf[ntc];
- if (!libeth_rx_sync_for_cpu(rx_buf, fields.size))
+ if (!libeth_rx_sync_for_cpu(rx_buf, fields.len))
goto skip_data;
if (skb)
- idpf_rx_add_frag(rx_buf, skb, fields.size);
+ idpf_rx_add_frag(rx_buf, skb, fields.len);
else
- skb = idpf_rx_build_skb(rx_buf, fields.size);
+ skb = idpf_rx_build_skb(rx_buf, fields.len);
/* exit if we failed to retrieve a buffer */
if (!skb)
@@ -1033,8 +1037,7 @@ skip_data:
total_rx_bytes += skb->len;
/* protocol */
- idpf_rx_singleq_process_skb_fields(rx_q, skb,
- rx_desc, fields.rx_ptype);
+ idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, ptype);
/* send completed skb up the stack */
napi_gro_receive(rx_q->pp->p.napi, skb);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 9be6a6b59c4e..2747dc69999a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2895,7 +2895,7 @@ idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb,
* skb->protocol must be set before this function is called
*/
static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb,
- struct idpf_rx_csum_decoded csum_bits,
+ struct libeth_rx_csum csum_bits,
struct libeth_rx_pt decoded)
{
bool ipv4, ipv6;
@@ -2923,7 +2923,7 @@ static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb,
if (unlikely(csum_bits.l4e))
goto checksum_fail;
- if (csum_bits.raw_csum_inv ||
+ if (!csum_bits.raw_csum_valid ||
decoded.inner_prot == LIBETH_RX_PT_INNER_SCTP) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return;
@@ -2946,10 +2946,10 @@ checksum_fail:
*
* Return: parsed checksum status.
**/
-static struct idpf_rx_csum_decoded
+static struct libeth_rx_csum
idpf_rx_splitq_extract_csum_bits(const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
{
- struct idpf_rx_csum_decoded csum = { };
+ struct libeth_rx_csum csum = { };
u8 qword0, qword1;
qword0 = rx_desc->status_err0_qw0;
@@ -2965,9 +2965,9 @@ idpf_rx_splitq_extract_csum_bits(const struct virtchnl2_rx_flex_desc_adv_nic_3 *
qword1);
csum.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M,
qword0);
- csum.raw_csum_inv =
- le16_get_bits(rx_desc->ptype_err_fflags0,
- VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M);
+ csum.raw_csum_valid =
+ !le16_get_bits(rx_desc->ptype_err_fflags0,
+ VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M);
csum.raw_csum = le16_to_cpu(rx_desc->misc.raw_cs);
return csum;
@@ -3058,7 +3058,7 @@ static int
idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
{
- struct idpf_rx_csum_decoded csum_bits;
+ struct libeth_rx_csum csum_bits;
struct libeth_rx_pt decoded;
u16 rx_ptype;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 0f71a6f5557b..cd9a20c9cc7f 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -213,25 +213,6 @@ enum idpf_tx_ctx_desc_eipt_offload {
IDPF_TX_CTX_EXT_IP_IPV4 = 0x3
};
-/* Checksum offload bits decoded from the receive descriptor. */
-struct idpf_rx_csum_decoded {
- u32 l3l4p : 1;
- u32 ipe : 1;
- u32 eipe : 1;
- u32 eudpe : 1;
- u32 ipv6exadd : 1;
- u32 l4e : 1;
- u32 pprs : 1;
- u32 nat : 1;
- u32 raw_csum_inv : 1;
- u32 raw_csum : 16;
-};
-
-struct idpf_rx_extracted {
- unsigned int size;
- u16 rx_ptype;
-};
-
#define IDPF_TX_COMPLQ_CLEAN_BUDGET 256
#define IDPF_TX_MIN_PKT_LEN 17
#define IDPF_TX_DESCS_FOR_SKB_DATA_PTR 1
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index 13bbd3346e01..c538e6b18aad 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -14,6 +14,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
bool if_running = netif_running(dev);
struct bpf_prog *old_prog;
bool need_update;
+ unsigned int i;
if (dev->mtu > ETH_DATA_LEN) {
/* For now, the driver doesn't support XDP functionality with
@@ -24,8 +25,13 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
}
need_update = !!adapter->xdp_prog != !!prog;
- if (if_running && need_update)
- igc_close(dev);
+ if (if_running && need_update) {
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ igc_disable_rx_ring(adapter->rx_ring[i]);
+ igc_disable_tx_ring(adapter->tx_ring[i]);
+ napi_disable(&adapter->rx_ring[i]->q_vector->napi);
+ }
+ }
old_prog = xchg(&adapter->xdp_prog, prog);
if (old_prog)
@@ -36,8 +42,13 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
else
xdp_features_clear_redirect_target(dev);
- if (if_running && need_update)
- igc_open(dev);
+ if (if_running && need_update) {
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ napi_enable(&adapter->rx_ring[i]->q_vector->napi);
+ igc_enable_tx_ring(adapter->tx_ring[i]);
+ igc_enable_rx_ring(adapter->rx_ring[i]);
+ }
+ }
return 0;
}
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 4fe121b9f94b..44b18c573909 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -5557,7 +5557,6 @@ static int mvneta_probe(struct platform_device *pdev)
clk_prepare_enable(pp->clk_bus);
pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
- pp->phylink_pcs.neg_mode = true;
pp->phylink_config.dev = &dev->dev;
pp->phylink_config.type = PHYLINK_NETDEV;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index dd76c1b7ed3a..f166dc4e6503 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7024,9 +7024,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
dev->dev_port = port->id;
port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops;
- port->pcs_gmac.neg_mode = true;
port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops;
- port->pcs_xlg.neg_mode = true;
if (!mvpp2_use_acpi_compat_mode(port_fwnode)) {
port->phylink_config.dev = &dev->dev;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index cb6513ab35e7..69e0778f9ac1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_RVU_ESWITCH) += rvu_rep.o
rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
- otx2_devlink.o qos_sq.o qos.o
+ otx2_devlink.o qos_sq.o qos.o otx2_xsk.o
rvu_nicvf-y := otx2_vf.o
rvu_rep-y := rep.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index a15cc86635d6..c3b6e0f60a79 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -112,9 +112,12 @@ int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
struct otx2_nic *pfvf = dev;
int cnt = cq->pool_ptrs;
u64 ptrs[NPA_MAX_BURST];
+ struct otx2_pool *pool;
dma_addr_t bufptr;
int num_ptrs = 1;
+ pool = &pfvf->qset.pool[cq->cq_idx];
+
/* Refill pool with new buffers */
while (cq->pool_ptrs) {
if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
@@ -124,7 +127,9 @@ int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
break;
}
cq->pool_ptrs--;
- ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM;
+ ptrs[num_ptrs] = pool->xsk_pool ?
+ (u64)bufptr : (u64)bufptr + OTX2_HEAD_ROOM;
+
num_ptrs++;
if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 2b49bfec7869..84cd029a85aa 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -17,6 +17,7 @@
#include "otx2_common.h"
#include "otx2_struct.h"
#include "cn10k.h"
+#include "otx2_xsk.h"
static bool otx2_is_pfc_enabled(struct otx2_nic *pfvf)
{
@@ -330,6 +331,10 @@ int otx2_set_rss_table(struct otx2_nic *pfvf, int ctx_id)
rss_ctx = rss->rss_ctx[ctx_id];
/* Get memory to put this msg */
for (idx = 0; idx < rss->rss_size; idx++) {
+ /* Ignore the queue if AF_XDP zero copy is enabled */
+ if (test_bit(rss_ctx->ind_tbl[idx], pfvf->af_xdp_zc_qidx))
+ continue;
+
aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox);
if (!aq) {
/* The shared memory buffer can be full.
@@ -549,10 +554,13 @@ static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
}
static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
- dma_addr_t *dma)
+ dma_addr_t *dma, int qidx, int idx)
{
u8 *buf;
+ if (pool->xsk_pool)
+ return otx2_xsk_pool_alloc_buf(pfvf, pool, dma, idx);
+
if (pool->page_pool)
return otx2_alloc_pool_buf(pfvf, pool, dma);
@@ -571,12 +579,12 @@ static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
}
int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
- dma_addr_t *dma)
+ dma_addr_t *dma, int qidx, int idx)
{
int ret;
local_bh_disable();
- ret = __otx2_alloc_rbuf(pfvf, pool, dma);
+ ret = __otx2_alloc_rbuf(pfvf, pool, dma, qidx, idx);
local_bh_enable();
return ret;
}
@@ -584,7 +592,8 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
dma_addr_t *dma)
{
- if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma)))
+ if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma,
+ cq->cq_idx, cq->pool_ptrs - 1)))
return -ENOMEM;
return 0;
}
@@ -884,7 +893,7 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
#define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */
#define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */
-static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
+int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
{
struct otx2_qset *qset = &pfvf->qset;
struct nix_aq_enq_req *aq;
@@ -1028,6 +1037,10 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
sq->stats.bytes = 0;
sq->stats.pkts = 0;
+ /* Attach XSK_BUFF_POOL to XDP queue */
+ if (qidx > pfvf->hw.xdp_queues)
+ otx2_attach_xsk_buff(pfvf, sq, (qidx - pfvf->hw.xdp_queues));
+
chan_offset = qidx % pfvf->hw.tx_chan_cnt;
err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, chan_offset, sqb_aura);
@@ -1041,12 +1054,13 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
}
-static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
+int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
{
struct otx2_qset *qset = &pfvf->qset;
int err, pool_id, non_xdp_queues;
struct nix_aq_enq_req *aq;
struct otx2_cq_queue *cq;
+ struct otx2_pool *pool;
cq = &qset->cq[qidx];
cq->cq_idx = qidx;
@@ -1055,8 +1069,20 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
cq->cq_type = CQ_RX;
cq->cint_idx = qidx;
cq->cqe_cnt = qset->rqe_cnt;
- if (pfvf->xdp_prog)
+ if (pfvf->xdp_prog) {
xdp_rxq_info_reg(&cq->xdp_rxq, pfvf->netdev, qidx, 0);
+ pool = &qset->pool[qidx];
+ if (pool->xsk_pool) {
+ xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL);
+ xsk_pool_set_rxq_info(pool->xsk_pool, &cq->xdp_rxq);
+ } else if (pool->page_pool) {
+ xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL,
+ pool->page_pool);
+ }
+ }
} else if (qidx < non_xdp_queues) {
cq->cq_type = CQ_TX;
cq->cint_idx = qidx - pfvf->hw.rx_queues;
@@ -1275,9 +1301,10 @@ void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool,
pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
page = virt_to_head_page(phys_to_virt(pa));
-
if (pool->page_pool) {
page_pool_put_full_page(pool->page_pool, page, true);
+ } else if (pool->xsk_pool) {
+ /* Note: No way of identifying xdp_buff */
} else {
dma_unmap_page_attrs(pfvf->dev, iova, size,
DMA_FROM_DEVICE,
@@ -1292,6 +1319,7 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
int pool_id, pool_start = 0, pool_end = 0, size = 0;
struct otx2_pool *pool;
u64 iova;
+ int idx;
if (type == AURA_NIX_SQ) {
pool_start = otx2_get_pool_idx(pfvf, type, 0);
@@ -1306,16 +1334,21 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
/* Free SQB and RQB pointers from the aura pool */
for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
- iova = otx2_aura_allocptr(pfvf, pool_id);
pool = &pfvf->qset.pool[pool_id];
+ iova = otx2_aura_allocptr(pfvf, pool_id);
while (iova) {
if (type == AURA_NIX_RQ)
iova -= OTX2_HEAD_ROOM;
-
otx2_free_bufs(pfvf, pool, iova, size);
-
iova = otx2_aura_allocptr(pfvf, pool_id);
}
+
+ for (idx = 0 ; idx < pool->xdp_cnt; idx++) {
+ if (!pool->xdp[idx])
+ continue;
+
+ xsk_buff_free(pool->xdp[idx]);
+ }
}
}
@@ -1332,7 +1365,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf)
qmem_free(pfvf->dev, pool->stack);
qmem_free(pfvf->dev, pool->fc_addr);
page_pool_destroy(pool->page_pool);
- pool->page_pool = NULL;
+ devm_kfree(pfvf->dev, pool->xdp);
+ pool->xsk_pool = NULL;
}
devm_kfree(pfvf->dev, pfvf->qset.pool);
pfvf->qset.pool = NULL;
@@ -1419,6 +1453,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
int stack_pages, int numptrs, int buf_size, int type)
{
struct page_pool_params pp_params = { 0 };
+ struct xsk_buff_pool *xsk_pool;
struct npa_aq_enq_req *aq;
struct otx2_pool *pool;
int err;
@@ -1462,21 +1497,35 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
aq->ctype = NPA_AQ_CTYPE_POOL;
aq->op = NPA_AQ_INSTOP_INIT;
- if (type != AURA_NIX_RQ) {
- pool->page_pool = NULL;
+ if (type != AURA_NIX_RQ)
+ return 0;
+
+ if (!test_bit(pool_id, pfvf->af_xdp_zc_qidx)) {
+ pp_params.order = get_order(buf_size);
+ pp_params.flags = PP_FLAG_DMA_MAP;
+ pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
+ pp_params.nid = NUMA_NO_NODE;
+ pp_params.dev = pfvf->dev;
+ pp_params.dma_dir = DMA_FROM_DEVICE;
+ pool->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(pool->page_pool)) {
+ netdev_err(pfvf->netdev, "Creation of page pool failed\n");
+ return PTR_ERR(pool->page_pool);
+ }
return 0;
}
- pp_params.order = get_order(buf_size);
- pp_params.flags = PP_FLAG_DMA_MAP;
- pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
- pp_params.nid = NUMA_NO_NODE;
- pp_params.dev = pfvf->dev;
- pp_params.dma_dir = DMA_FROM_DEVICE;
- pool->page_pool = page_pool_create(&pp_params);
- if (IS_ERR(pool->page_pool)) {
- netdev_err(pfvf->netdev, "Creation of page pool failed\n");
- return PTR_ERR(pool->page_pool);
+ /* Set XSK pool to support AF_XDP zero-copy */
+ xsk_pool = xsk_get_pool_from_qid(pfvf->netdev, pool_id);
+ if (xsk_pool) {
+ pool->xsk_pool = xsk_pool;
+ pool->xdp_cnt = numptrs;
+ pool->xdp = devm_kcalloc(pfvf->dev,
+ numptrs, sizeof(struct xdp_buff *), GFP_KERNEL);
+ if (IS_ERR(pool->xdp)) {
+ netdev_err(pfvf->netdev, "Creation of xsk pool failed\n");
+ return PTR_ERR(pool->xdp);
+ }
}
return 0;
@@ -1537,9 +1586,18 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
}
for (ptr = 0; ptr < num_sqbs; ptr++) {
- err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
- if (err)
+ err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
+ if (err) {
+ if (pool->xsk_pool) {
+ ptr--;
+ while (ptr >= 0) {
+ xsk_buff_free(pool->xdp[ptr]);
+ ptr--;
+ }
+ }
goto err_mem;
+ }
+
pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
}
@@ -1589,11 +1647,19 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
/* Allocate pointers and free them to aura/pool */
for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
pool = &pfvf->qset.pool[pool_id];
+
for (ptr = 0; ptr < num_ptrs; ptr++) {
- err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
- if (err)
+ err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
+ if (err) {
+ if (pool->xsk_pool) {
+ while (ptr)
+ xsk_buff_free(pool->xdp[--ptr]);
+ }
return -ENOMEM;
+ }
+
pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
+ pool->xsk_pool ? bufptr :
bufptr + OTX2_HEAD_ROOM);
}
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 65814e3dc93f..1e88422825be 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -21,6 +21,7 @@
#include <linux/time64.h>
#include <linux/dim.h>
#include <uapi/linux/if_macsec.h>
+#include <net/page_pool/helpers.h>
#include <mbox.h>
#include <npc.h>
@@ -128,6 +129,12 @@ enum otx2_errcodes_re {
ERRCODE_IL4_CSUM = 0x22,
};
+enum otx2_xdp_action {
+ OTX2_XDP_TX = BIT(0),
+ OTX2_XDP_REDIRECT = BIT(1),
+ OTX2_AF_XDP_FRAME = BIT(2),
+};
+
struct otx2_dev_stats {
u64 rx_bytes;
u64 rx_frames;
@@ -531,6 +538,8 @@ struct otx2_nic {
/* Inline ipsec */
struct cn10k_ipsec ipsec;
+ /* af_xdp zero-copy */
+ unsigned long *af_xdp_zc_qidx;
};
static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -1002,7 +1011,7 @@ void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq);
void otx2_free_pending_sqe(struct otx2_nic *pfvf);
void otx2_sqb_flush(struct otx2_nic *pfvf);
int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
- dma_addr_t *dma);
+ dma_addr_t *dma, int qidx, int idx);
int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
@@ -1032,6 +1041,8 @@ void otx2_pfaf_mbox_destroy(struct otx2_nic *pf);
void otx2_disable_mbox_intr(struct otx2_nic *pf);
void otx2_disable_napi(struct otx2_nic *pf);
irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq);
+int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura);
+int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx);
/* RSS configuration APIs*/
int otx2_rss_init(struct otx2_nic *pfvf);
@@ -1094,7 +1105,8 @@ int otx2_del_macfilter(struct net_device *netdev, const u8 *mac);
int otx2_add_macfilter(struct net_device *netdev, const u8 *mac);
int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable);
int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf);
-bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx);
+bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, struct xdp_frame *xdpf,
+ u64 iova, int len, u16 qidx, u16 flags);
u16 otx2_get_max_mtu(struct otx2_nic *pfvf);
int otx2_handle_ntuple_tc_features(struct net_device *netdev,
netdev_features_t features);
@@ -1175,4 +1187,5 @@ static inline int mcam_entry_cmp(const void *a, const void *b)
dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf,
struct sk_buff *skb, int seg, int *len);
void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg);
+int otx2_read_free_sqe(struct otx2_nic *pfvf, u16 qidx);
#endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 2d53dc77ef1e..010385b29988 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -910,8 +910,12 @@ static int otx2_get_rxfh(struct net_device *dev,
return -ENOENT;
if (indir) {
- for (idx = 0; idx < rss->rss_size; idx++)
+ for (idx = 0; idx < rss->rss_size; idx++) {
+ /* Ignore if the rx queue is AF_XDP zero copy enabled */
+ if (test_bit(rss_ctx->ind_tbl[idx], pfvf->af_xdp_zc_qidx))
+ continue;
indir[idx] = rss_ctx->ind_tbl[idx];
+ }
}
if (rxfh->key)
memcpy(rxfh->key, rss->key, sizeof(rss->key));
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index e1dde93e8af8..c7c562f0f5e5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -27,6 +27,7 @@
#include "qos.h"
#include <rvu_trace.h>
#include "cn10k_ipsec.h"
+#include "otx2_xsk.h"
#define DRV_NAME "rvu_nicpf"
#define DRV_STRING "Marvell RVU NIC Physical Function Driver"
@@ -1662,9 +1663,7 @@ void otx2_free_hw_resources(struct otx2_nic *pf)
struct nix_lf_free_req *free_req;
struct mbox *mbox = &pf->mbox;
struct otx2_cq_queue *cq;
- struct otx2_pool *pool;
struct msg_req *req;
- int pool_id;
int qidx;
/* Ensure all SQE are processed */
@@ -1705,13 +1704,6 @@ void otx2_free_hw_resources(struct otx2_nic *pf)
/* Free RQ buffer pointers*/
otx2_free_aura_ptr(pf, AURA_NIX_RQ);
- for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) {
- pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx);
- pool = &pf->qset.pool[pool_id];
- page_pool_destroy(pool->page_pool);
- pool->page_pool = NULL;
- }
-
otx2_free_cq_res(pf);
/* Free all ingress bandwidth profiles allocated */
@@ -2691,7 +2683,6 @@ static int otx2_get_vf_config(struct net_device *netdev, int vf,
static int otx2_xdp_xmit_tx(struct otx2_nic *pf, struct xdp_frame *xdpf,
int qidx)
{
- struct page *page;
u64 dma_addr;
int err = 0;
@@ -2701,11 +2692,11 @@ static int otx2_xdp_xmit_tx(struct otx2_nic *pf, struct xdp_frame *xdpf,
if (dma_mapping_error(pf->dev, dma_addr))
return -ENOMEM;
- err = otx2_xdp_sq_append_pkt(pf, dma_addr, xdpf->len, qidx);
+ err = otx2_xdp_sq_append_pkt(pf, xdpf, dma_addr, xdpf->len,
+ qidx, OTX2_XDP_REDIRECT);
if (!err) {
otx2_dma_unmap_page(pf, dma_addr, xdpf->len, DMA_TO_DEVICE);
- page = virt_to_page(xdpf->data);
- put_page(page);
+ xdp_return_frame(xdpf);
return -ENOMEM;
}
return 0;
@@ -2789,6 +2780,8 @@ static int otx2_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return otx2_xdp_setup(pf, xdp->prog);
+ case XDP_SETUP_XSK_POOL:
+ return otx2_xsk_pool_setup(pf, xdp->xsk.pool, xdp->xsk.queue_id);
default:
return -EINVAL;
}
@@ -2866,6 +2859,7 @@ static const struct net_device_ops otx2_netdev_ops = {
.ndo_set_vf_vlan = otx2_set_vf_vlan,
.ndo_get_vf_config = otx2_get_vf_config,
.ndo_bpf = otx2_xdp,
+ .ndo_xsk_wakeup = otx2_xsk_wakeup,
.ndo_xdp_xmit = otx2_xdp_xmit,
.ndo_setup_tc = otx2_setup_tc,
.ndo_set_vf_trust = otx2_ndo_set_vf_trust,
@@ -3204,16 +3198,26 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* Enable link notifications */
otx2_cgx_config_linkevents(pf, true);
+ pf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL);
+ if (!pf->af_xdp_zc_qidx) {
+ err = -ENOMEM;
+ goto err_sriov_cleannup;
+ }
+
#ifdef CONFIG_DCB
err = otx2_dcbnl_set_ops(netdev);
if (err)
- goto err_pf_sriov_init;
+ goto err_free_zc_bmap;
#endif
otx2_qos_init(pf, qos_txqs);
return 0;
+err_free_zc_bmap:
+ bitmap_free(pf->af_xdp_zc_qidx);
+err_sriov_cleannup:
+ otx2_sriov_vfcfg_cleanup(pf);
err_pf_sriov_init:
otx2_shutdown_tc(pf);
err_mcam_flow_del:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 224cef938927..6bc5ce5a9f61 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -12,6 +12,7 @@
#include <linux/bpf_trace.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
+#include <net/xdp.h>
#include "otx2_reg.h"
#include "otx2_common.h"
@@ -19,6 +20,7 @@
#include "otx2_txrx.h"
#include "otx2_ptp.h"
#include "cn10k.h"
+#include "otx2_xsk.h"
#define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx)))
#define PTP_PORT 0x13F
@@ -29,6 +31,12 @@
DEFINE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled);
+static int otx2_get_free_sqe(struct otx2_snd_queue *sq)
+{
+ return (sq->cons_head - sq->head - 1 + sq->sqe_cnt)
+ & (sq->sqe_cnt - 1);
+}
+
static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
struct bpf_prog *prog,
struct nix_cqe_rx_s *cqe,
@@ -96,20 +104,22 @@ static unsigned int frag_num(unsigned int i)
static void otx2_xdp_snd_pkt_handler(struct otx2_nic *pfvf,
struct otx2_snd_queue *sq,
- struct nix_cqe_tx_s *cqe)
+ struct nix_cqe_tx_s *cqe,
+ int *xsk_frames)
{
struct nix_send_comp_s *snd_comp = &cqe->comp;
struct sg_list *sg;
- struct page *page;
- u64 pa;
sg = &sq->sg[snd_comp->sqe_id];
+ if (sg->flags & OTX2_AF_XDP_FRAME) {
+ (*xsk_frames)++;
+ return;
+ }
- pa = otx2_iova_to_phys(pfvf->iommu_domain, sg->dma_addr[0]);
- otx2_dma_unmap_page(pfvf, sg->dma_addr[0],
- sg->size[0], DMA_TO_DEVICE);
- page = virt_to_page(phys_to_virt(pa));
- put_page(page);
+ if (sg->flags & OTX2_XDP_REDIRECT)
+ otx2_dma_unmap_page(pfvf, sg->dma_addr[0], sg->size[0], DMA_TO_DEVICE);
+ xdp_return_frame((struct xdp_frame *)sg->skb);
+ sg->skb = (u64)NULL;
}
static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
@@ -431,6 +441,18 @@ int otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
return cnt - cq->pool_ptrs;
}
+static void otx2_zc_submit_pkts(struct otx2_nic *pfvf, struct xsk_buff_pool *xsk_pool,
+ int *xsk_frames, int qidx, int budget)
+{
+ if (*xsk_frames)
+ xsk_tx_completed(xsk_pool, *xsk_frames);
+
+ if (xsk_uses_need_wakeup(xsk_pool))
+ xsk_set_tx_need_wakeup(xsk_pool);
+
+ otx2_zc_napi_handler(pfvf, xsk_pool, qidx, budget);
+}
+
static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
struct otx2_cq_queue *cq, int budget)
{
@@ -439,16 +461,22 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
struct nix_cqe_tx_s *cqe;
struct net_device *ndev;
int processed_cqe = 0;
+ int xsk_frames = 0;
+
+ qidx = cq->cq_idx - pfvf->hw.rx_queues;
+ sq = &pfvf->qset.sq[qidx];
if (cq->pend_cqe >= budget)
goto process_cqe;
- if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) {
+ if (sq->xsk_pool)
+ otx2_zc_submit_pkts(pfvf, sq->xsk_pool, &xsk_frames,
+ qidx, budget);
return 0;
+ }
process_cqe:
- qidx = cq->cq_idx - pfvf->hw.rx_queues;
- sq = &pfvf->qset.sq[qidx];
while (likely(processed_cqe < budget) && cq->pend_cqe) {
cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
@@ -458,10 +486,8 @@ process_cqe:
break;
}
- qidx = cq->cq_idx - pfvf->hw.rx_queues;
-
if (cq->cq_type == CQ_XDP)
- otx2_xdp_snd_pkt_handler(pfvf, sq, cqe);
+ otx2_xdp_snd_pkt_handler(pfvf, sq, cqe, &xsk_frames);
else
otx2_snd_pkt_handler(pfvf, cq, &pfvf->qset.sq[qidx],
cqe, budget, &tx_pkts, &tx_bytes);
@@ -502,6 +528,10 @@ process_cqe:
netif_carrier_ok(ndev))
netif_tx_wake_queue(txq);
}
+
+ if (sq->xsk_pool)
+ otx2_zc_submit_pkts(pfvf, sq->xsk_pool, &xsk_frames, qidx, budget);
+
return 0;
}
@@ -527,9 +557,10 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p
int otx2_napi_handler(struct napi_struct *napi, int budget)
{
struct otx2_cq_queue *rx_cq = NULL;
+ struct otx2_cq_queue *cq = NULL;
+ struct otx2_pool *pool = NULL;
struct otx2_cq_poll *cq_poll;
int workdone = 0, cq_idx, i;
- struct otx2_cq_queue *cq;
struct otx2_qset *qset;
struct otx2_nic *pfvf;
int filled_cnt = -1;
@@ -554,6 +585,7 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
if (rx_cq && rx_cq->pool_ptrs)
filled_cnt = pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq);
+
/* Clear the IRQ */
otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx), BIT_ULL(0));
@@ -566,20 +598,31 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED)
otx2_adjust_adaptive_coalese(pfvf, cq_poll);
+ if (likely(cq))
+ pool = &pfvf->qset.pool[cq->cq_idx];
+
if (unlikely(!filled_cnt)) {
struct refill_work *work;
struct delayed_work *dwork;
- work = &pfvf->refill_wrk[cq->cq_idx];
- dwork = &work->pool_refill_work;
- /* Schedule a task if no other task is running */
- if (!cq->refill_task_sched) {
- work->napi = napi;
- cq->refill_task_sched = true;
- schedule_delayed_work(dwork,
- msecs_to_jiffies(100));
+ if (likely(cq)) {
+ work = &pfvf->refill_wrk[cq->cq_idx];
+ dwork = &work->pool_refill_work;
+ /* Schedule a task if no other task is running */
+ if (!cq->refill_task_sched) {
+ work->napi = napi;
+ cq->refill_task_sched = true;
+ schedule_delayed_work(dwork,
+ msecs_to_jiffies(100));
+ }
+ /* Call wake-up for not able to fill buffers */
+ if (pool->xsk_pool)
+ xsk_set_rx_need_wakeup(pool->xsk_pool);
}
} else {
+ /* Clear wake-up, since buffers are filled successfully */
+ if (pool && pool->xsk_pool)
+ xsk_clear_rx_need_wakeup(pool->xsk_pool);
/* Re-enable interrupts */
otx2_write64(pfvf,
NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx),
@@ -1147,7 +1190,7 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq,
/* Check if there is enough room between producer
* and consumer index.
*/
- free_desc = (sq->cons_head - sq->head - 1 + sq->sqe_cnt) & (sq->sqe_cnt - 1);
+ free_desc = otx2_get_free_sqe(sq);
if (free_desc < sq->sqe_thresh)
return false;
@@ -1230,15 +1273,19 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int q
u16 pool_id;
u64 iova;
- if (pfvf->xdp_prog)
+ pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
+ pool = &pfvf->qset.pool[pool_id];
+
+ if (pfvf->xdp_prog) {
+ if (pool->page_pool)
+ xdp_rxq_info_unreg_mem_model(&cq->xdp_rxq);
+
xdp_rxq_info_unreg(&cq->xdp_rxq);
+ }
if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
return;
- pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
- pool = &pfvf->qset.pool[pool_id];
-
while (cq->pend_cqe) {
cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq);
processed_cqe++;
@@ -1359,8 +1406,9 @@ void otx2_free_pending_sqe(struct otx2_nic *pfvf)
}
}
-static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, u64 dma_addr,
- int len, int *offset)
+static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq,
+ struct xdp_frame *xdpf,
+ u64 dma_addr, int len, int *offset, u16 flags)
{
struct nix_sqe_sg_s *sg = NULL;
u64 *iova = NULL;
@@ -1377,16 +1425,34 @@ static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, u64 dma_addr,
sq->sg[sq->head].dma_addr[0] = dma_addr;
sq->sg[sq->head].size[0] = len;
sq->sg[sq->head].num_segs = 1;
+ sq->sg[sq->head].flags = flags;
+ sq->sg[sq->head].skb = (u64)xdpf;
}
-bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx)
+int otx2_read_free_sqe(struct otx2_nic *pfvf, u16 qidx)
+{
+ struct otx2_snd_queue *sq;
+ int free_sqe;
+
+ sq = &pfvf->qset.sq[qidx];
+ free_sqe = otx2_get_free_sqe(sq);
+ if (free_sqe < sq->sqe_thresh) {
+ netdev_warn(pfvf->netdev, "No free sqe for Send queue%d\n", qidx);
+ return 0;
+ }
+
+ return free_sqe - sq->sqe_thresh;
+}
+
+bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, struct xdp_frame *xdpf,
+ u64 iova, int len, u16 qidx, u16 flags)
{
struct nix_sqe_hdr_s *sqe_hdr;
struct otx2_snd_queue *sq;
int offset, free_sqe;
sq = &pfvf->qset.sq[qidx];
- free_sqe = (sq->num_sqbs - *sq->aura_fc_addr) * sq->sqe_per_sqb;
+ free_sqe = otx2_get_free_sqe(sq);
if (free_sqe < sq->sqe_thresh)
return false;
@@ -1405,7 +1471,7 @@ bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx)
offset = sizeof(*sqe_hdr);
- otx2_xdp_sqe_add_sg(sq, iova, len, &offset);
+ otx2_xdp_sqe_add_sg(sq, xdpf, iova, len, &offset, flags);
sqe_hdr->sizem1 = (offset / 16) - 1;
pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx);
@@ -1418,14 +1484,28 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
struct otx2_cq_queue *cq,
bool *need_xdp_flush)
{
+ struct xdp_buff xdp, *xsk_buff = NULL;
unsigned char *hard_start;
+ struct otx2_pool *pool;
+ struct xdp_frame *xdpf;
int qidx = cq->cq_idx;
- struct xdp_buff xdp;
struct page *page;
u64 iova, pa;
u32 act;
int err;
+ pool = &pfvf->qset.pool[qidx];
+
+ if (pool->xsk_pool) {
+ xsk_buff = pool->xdp[--cq->rbpool->xdp_top];
+ if (!xsk_buff)
+ return false;
+
+ xsk_buff->data_end = xsk_buff->data + cqe->sg.seg_size;
+ act = bpf_prog_run_xdp(prog, xsk_buff);
+ goto handle_xdp_verdict;
+ }
+
iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM;
pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
page = virt_to_page(phys_to_virt(pa));
@@ -1438,37 +1518,59 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
act = bpf_prog_run_xdp(prog, &xdp);
+handle_xdp_verdict:
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
qidx += pfvf->hw.tx_queues;
cq->pool_ptrs++;
- return otx2_xdp_sq_append_pkt(pfvf, iova,
- cqe->sg.seg_size, qidx);
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ return otx2_xdp_sq_append_pkt(pfvf, xdpf,
+ cqe->sg.seg_addr,
+ cqe->sg.seg_size,
+ qidx, OTX2_XDP_TX);
case XDP_REDIRECT:
cq->pool_ptrs++;
- err = xdp_do_redirect(pfvf->netdev, &xdp, prog);
+ if (xsk_buff) {
+ err = xdp_do_redirect(pfvf->netdev, xsk_buff, prog);
+ if (!err) {
+ *need_xdp_flush = true;
+ return true;
+ }
+ return false;
+ }
- otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
- DMA_FROM_DEVICE);
+ err = xdp_do_redirect(pfvf->netdev, &xdp, prog);
if (!err) {
*need_xdp_flush = true;
return true;
}
- put_page(page);
+
+ otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
+ DMA_FROM_DEVICE);
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ xdp_return_frame(xdpf);
break;
default:
bpf_warn_invalid_xdp_action(pfvf->netdev, prog, act);
break;
case XDP_ABORTED:
+ if (xsk_buff)
+ xsk_buff_free(xsk_buff);
trace_xdp_exception(pfvf->netdev, prog, act);
break;
case XDP_DROP:
- otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
- DMA_FROM_DEVICE);
- put_page(page);
cq->pool_ptrs++;
+ if (xsk_buff) {
+ xsk_buff_free(xsk_buff);
+ } else if (page->pp) {
+ page_pool_recycle_direct(pool->page_pool, page);
+ } else {
+ otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
+ DMA_FROM_DEVICE);
+ put_page(page);
+ }
return true;
}
return false;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index d23810963fdb..acf259d72008 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -12,6 +12,7 @@
#include <linux/iommu.h>
#include <linux/if_vlan.h>
#include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
#define LBK_CHAN_BASE 0x000
#define SDP_CHAN_BASE 0x700
@@ -76,6 +77,7 @@ struct otx2_rcv_queue {
struct sg_list {
u16 num_segs;
+ u16 flags;
u64 skb;
u64 size[OTX2_MAX_FRAGS_IN_SQE];
u64 dma_addr[OTX2_MAX_FRAGS_IN_SQE];
@@ -104,6 +106,8 @@ struct otx2_snd_queue {
/* SQE ring and CPT response queue for Inline IPSEC */
struct qmem *sqe_ring;
struct qmem *cpt_resp;
+ /* Buffer pool for af_xdp zero-copy */
+ struct xsk_buff_pool *xsk_pool;
} ____cacheline_aligned_in_smp;
enum cq_type {
@@ -127,7 +131,11 @@ struct otx2_pool {
struct qmem *stack;
struct qmem *fc_addr;
struct page_pool *page_pool;
+ struct xsk_buff_pool *xsk_pool;
+ struct xdp_buff **xdp;
+ u16 xdp_cnt;
u16 rbsize;
+ u16 xdp_top;
};
struct otx2_cq_queue {
@@ -144,6 +152,7 @@ struct otx2_cq_queue {
void *cqe_base;
struct qmem *cqe;
struct otx2_pool *rbpool;
+ bool xsk_zc_en;
struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index e926c6ce96cf..63ddd262d122 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -722,15 +722,25 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_shutdown_tc;
+ vf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL);
+ if (!vf->af_xdp_zc_qidx) {
+ err = -ENOMEM;
+ goto err_unreg_devlink;
+ }
+
#ifdef CONFIG_DCB
err = otx2_dcbnl_set_ops(netdev);
if (err)
- goto err_shutdown_tc;
+ goto err_free_zc_bmap;
#endif
otx2_qos_init(vf, qos_txqs);
return 0;
+err_free_zc_bmap:
+ bitmap_free(vf->af_xdp_zc_qidx);
+err_unreg_devlink:
+ otx2_unregister_dl(vf);
err_shutdown_tc:
otx2_shutdown_tc(vf);
err_unreg_netdev:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
new file mode 100644
index 000000000000..ce10caea8511
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2024 Marvell.
+ *
+ */
+
+#include <linux/bpf_trace.h>
+#include <linux/stringify.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "otx2_common.h"
+#include "otx2_xsk.h"
+
+int otx2_xsk_pool_alloc_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+ dma_addr_t *dma, int idx)
+{
+ struct xdp_buff *xdp;
+ int delta;
+
+ xdp = xsk_buff_alloc(pool->xsk_pool);
+ if (!xdp)
+ return -ENOMEM;
+
+ pool->xdp[pool->xdp_top++] = xdp;
+ *dma = OTX2_DATA_ALIGN(xsk_buff_xdp_get_dma(xdp));
+ /* Adjust xdp->data for unaligned addresses */
+ delta = *dma - xsk_buff_xdp_get_dma(xdp);
+ xdp->data += delta;
+
+ return 0;
+}
+
+static int otx2_xsk_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id)
+{
+ struct nix_cn10k_aq_enq_req *cn10k_rq_aq;
+ struct npa_aq_enq_req *aura_aq;
+ struct npa_aq_enq_req *pool_aq;
+ struct nix_aq_enq_req *rq_aq;
+
+ if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
+ cn10k_rq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
+ if (!cn10k_rq_aq)
+ return -ENOMEM;
+ cn10k_rq_aq->qidx = qidx;
+ cn10k_rq_aq->rq.ena = 0;
+ cn10k_rq_aq->rq_mask.ena = 1;
+ cn10k_rq_aq->ctype = NIX_AQ_CTYPE_RQ;
+ cn10k_rq_aq->op = NIX_AQ_INSTOP_WRITE;
+ } else {
+ rq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+ if (!rq_aq)
+ return -ENOMEM;
+ rq_aq->qidx = qidx;
+ rq_aq->sq.ena = 0;
+ rq_aq->sq_mask.ena = 1;
+ rq_aq->ctype = NIX_AQ_CTYPE_RQ;
+ rq_aq->op = NIX_AQ_INSTOP_WRITE;
+ }
+
+ aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+ if (!aura_aq)
+ goto fail;
+
+ aura_aq->aura_id = aura_id;
+ aura_aq->aura.ena = 0;
+ aura_aq->aura_mask.ena = 1;
+ aura_aq->ctype = NPA_AQ_CTYPE_AURA;
+ aura_aq->op = NPA_AQ_INSTOP_WRITE;
+
+ pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+ if (!pool_aq)
+ goto fail;
+
+ pool_aq->aura_id = aura_id;
+ pool_aq->pool.ena = 0;
+ pool_aq->pool_mask.ena = 1;
+
+ pool_aq->ctype = NPA_AQ_CTYPE_POOL;
+ pool_aq->op = NPA_AQ_INSTOP_WRITE;
+
+ return otx2_sync_mbox_msg(&pfvf->mbox);
+
+fail:
+ otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+ return -ENOMEM;
+}
+
+static void otx2_clean_up_rq(struct otx2_nic *pfvf, int qidx)
+{
+ struct otx2_qset *qset = &pfvf->qset;
+ struct otx2_cq_queue *cq;
+ struct otx2_pool *pool;
+ u64 iova;
+
+ /* If the DOWN flag is set SQs are already freed */
+ if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
+ return;
+
+ cq = &qset->cq[qidx];
+ if (cq)
+ otx2_cleanup_rx_cqes(pfvf, cq, qidx);
+
+ pool = &pfvf->qset.pool[qidx];
+ iova = otx2_aura_allocptr(pfvf, qidx);
+ while (iova) {
+ iova -= OTX2_HEAD_ROOM;
+ otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize);
+ iova = otx2_aura_allocptr(pfvf, qidx);
+ }
+
+ mutex_lock(&pfvf->mbox.lock);
+ otx2_xsk_ctx_disable(pfvf, qidx, qidx);
+ mutex_unlock(&pfvf->mbox.lock);
+}
+
+int otx2_xsk_pool_enable(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qidx)
+{
+ u16 rx_queues = pf->hw.rx_queues;
+ u16 tx_queues = pf->hw.tx_queues;
+ int err;
+
+ if (qidx >= rx_queues || qidx >= tx_queues)
+ return -EINVAL;
+
+ err = xsk_pool_dma_map(pool, pf->dev, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ if (err)
+ return err;
+
+ set_bit(qidx, pf->af_xdp_zc_qidx);
+ otx2_clean_up_rq(pf, qidx);
+ /* Reconfigure RSS table as 'qidx' cannot be part of RSS now */
+ otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP);
+ /* Kick start the NAPI context so that receiving will start */
+ return otx2_xsk_wakeup(pf->netdev, qidx, XDP_WAKEUP_RX);
+}
+
+int otx2_xsk_pool_disable(struct otx2_nic *pf, u16 qidx)
+{
+ struct net_device *netdev = pf->netdev;
+ struct xsk_buff_pool *pool;
+ struct otx2_snd_queue *sq;
+
+ pool = xsk_get_pool_from_qid(netdev, qidx);
+ if (!pool)
+ return -EINVAL;
+
+ sq = &pf->qset.sq[qidx + pf->hw.tx_queues];
+ sq->xsk_pool = NULL;
+ otx2_clean_up_rq(pf, qidx);
+ clear_bit(qidx, pf->af_xdp_zc_qidx);
+ xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ /* Reconfigure RSS table as 'qidx' now need to be part of RSS now */
+ otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP);
+
+ return 0;
+}
+
+int otx2_xsk_pool_setup(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qidx)
+{
+ if (pool)
+ return otx2_xsk_pool_enable(pf, pool, qidx);
+
+ return otx2_xsk_pool_disable(pf, qidx);
+}
+
+int otx2_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+ struct otx2_nic *pf = netdev_priv(dev);
+ struct otx2_cq_poll *cq_poll = NULL;
+ struct otx2_qset *qset = &pf->qset;
+
+ if (pf->flags & OTX2_FLAG_INTF_DOWN)
+ return -ENETDOWN;
+
+ if (queue_id >= pf->hw.rx_queues || queue_id >= pf->hw.tx_queues)
+ return -EINVAL;
+
+ cq_poll = &qset->napi[queue_id];
+ if (!cq_poll)
+ return -EINVAL;
+
+ /* Trigger interrupt */
+ if (!napi_if_scheduled_mark_missed(&cq_poll->napi)) {
+ otx2_write64(pf, NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx), BIT_ULL(0));
+ otx2_write64(pf, NIX_LF_CINTX_INT_W1S(cq_poll->cint_idx), BIT_ULL(0));
+ }
+
+ return 0;
+}
+
+void otx2_attach_xsk_buff(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, int qidx)
+{
+ if (test_bit(qidx, pfvf->af_xdp_zc_qidx))
+ sq->xsk_pool = xsk_get_pool_from_qid(pfvf->netdev, qidx);
+}
+
+void otx2_zc_napi_handler(struct otx2_nic *pfvf, struct xsk_buff_pool *pool,
+ int queue, int budget)
+{
+ struct xdp_desc *xdp_desc = pool->tx_descs;
+ int err, i, work_done = 0, batch;
+
+ budget = min(budget, otx2_read_free_sqe(pfvf, queue));
+ batch = xsk_tx_peek_release_desc_batch(pool, budget);
+ if (!batch)
+ return;
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t dma_addr;
+
+ dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc[i].addr);
+ err = otx2_xdp_sq_append_pkt(pfvf, NULL, dma_addr, xdp_desc[i].len,
+ queue, OTX2_AF_XDP_FRAME);
+ if (!err) {
+ netdev_err(pfvf->netdev, "AF_XDP: Unable to transfer packet err%d\n", err);
+ break;
+ }
+ work_done++;
+ }
+
+ if (work_done)
+ xsk_tx_release(pool);
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h
new file mode 100644
index 000000000000..8047fafee8fe
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell RVU PF/VF Netdev Devlink
+ *
+ * Copyright (C) 2024 Marvell.
+ *
+ */
+
+#ifndef OTX2_XSK_H
+#define OTX2_XSK_H
+
+struct otx2_nic;
+struct xsk_buff_pool;
+
+int otx2_xsk_pool_setup(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qid);
+int otx2_xsk_pool_enable(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qid);
+int otx2_xsk_pool_disable(struct otx2_nic *pf, u16 qid);
+int otx2_xsk_pool_alloc_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+ dma_addr_t *dma, int idx);
+int otx2_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+void otx2_zc_napi_handler(struct otx2_nic *pfvf, struct xsk_buff_pool *pool,
+ int queue, int budget);
+void otx2_attach_xsk_buff(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, int qidx);
+
+#endif /* OTX2_XSK_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
index 9d887bfc3108..c5dbae0e513b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
@@ -82,7 +82,7 @@ static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx)
}
for (ptr = 0; ptr < num_sqbs; ptr++) {
- err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
+ err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
if (err)
goto sqb_free;
pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 440a4c42b405..8cdecf61253c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -396,7 +396,6 @@ static int prestera_port_sfp_bind(struct prestera_port *port)
continue;
port->phylink_pcs.ops = &prestera_pcs_ops;
- port->phylink_pcs.neg_mode = true;
port->phy_config.dev = &port->dev->dev;
port->phy_config.type = PHYLINK_NETDEV;
diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c
index 09f448f29124..aa5f220ddbcf 100644
--- a/drivers/net/ethernet/mediatek/airoha_eth.c
+++ b/drivers/net/ethernet/mediatek/airoha_eth.c
@@ -2556,11 +2556,10 @@ static u16 airoha_dev_select_queue(struct net_device *dev, struct sk_buff *skb,
static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct skb_shared_info *sinfo = skb_shinfo(skb);
struct airoha_gdm_port *port = netdev_priv(dev);
+ u32 nr_frags = 1 + skb_shinfo(skb)->nr_frags;
u32 msg0, msg1, len = skb_headlen(skb);
struct airoha_qdma *qdma = port->qdma;
- u32 nr_frags = 1 + sinfo->nr_frags;
struct netdev_queue *txq;
struct airoha_queue *q;
void *data = skb->data;
@@ -2583,8 +2582,9 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, 0))
goto error;
- if (sinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
- __be16 csum = cpu_to_be16(sinfo->gso_size);
+ if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 |
+ SKB_GSO_TCPV6)) {
+ __be16 csum = cpu_to_be16(skb_shinfo(skb)->gso_size);
tcp_hdr(skb)->check = (__force __sum16)csum;
msg0 |= FIELD_PREP(QDMA_ETH_TXMSG_TSO_MASK, 1);
@@ -2613,7 +2613,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
for (i = 0; i < nr_frags; i++) {
struct airoha_qdma_desc *desc = &q->desc[index];
struct airoha_queue_entry *e = &q->entry[index];
- skb_frag_t *frag = &sinfo->frags[i];
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
dma_addr_t addr;
u32 val;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 53485142938c..922330b3f4d7 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -815,12 +815,60 @@ static void mtk_mac_link_up(struct phylink_config *config,
mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
}
+static void mtk_mac_disable_tx_lpi(struct phylink_config *config)
+{
+ struct mtk_mac *mac = container_of(config, struct mtk_mac,
+ phylink_config);
+ struct mtk_eth *eth = mac->hw;
+
+ mtk_m32(eth, MAC_MCR_EEE100M | MAC_MCR_EEE1G, 0, MTK_MAC_MCR(mac->id));
+}
+
+static int mtk_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
+ bool tx_clk_stop)
+{
+ struct mtk_mac *mac = container_of(config, struct mtk_mac,
+ phylink_config);
+ struct mtk_eth *eth = mac->hw;
+ u32 val;
+
+ /* Tx idle timer in ms */
+ timer = DIV_ROUND_UP(timer, 1000);
+
+ /* If the timer is zero, then set LPI_MODE, which allows the
+ * system to enter LPI mode immediately rather than waiting for
+ * the LPI threshold.
+ */
+ if (!timer)
+ val = MAC_EEE_LPI_MODE;
+ else if (FIELD_FIT(MAC_EEE_LPI_TXIDLE_THD, timer))
+ val = FIELD_PREP(MAC_EEE_LPI_TXIDLE_THD, timer);
+ else
+ val = MAC_EEE_LPI_TXIDLE_THD;
+
+ if (tx_clk_stop)
+ val |= MAC_EEE_CKG_TXIDLE;
+
+ /* PHY Wake-up time, this field does not have a reset value, so use the
+ * reset value from MT7531 (36us for 100M and 17us for 1000M).
+ */
+ val |= FIELD_PREP(MAC_EEE_WAKEUP_TIME_1000, 17) |
+ FIELD_PREP(MAC_EEE_WAKEUP_TIME_100, 36);
+
+ mtk_w32(eth, val, MTK_MAC_EEECR(mac->id));
+ mtk_m32(eth, 0, MAC_MCR_EEE100M | MAC_MCR_EEE1G, MTK_MAC_MCR(mac->id));
+
+ return 0;
+}
+
static const struct phylink_mac_ops mtk_phylink_ops = {
.mac_select_pcs = mtk_mac_select_pcs,
.mac_config = mtk_mac_config,
.mac_finish = mtk_mac_finish,
.mac_link_down = mtk_mac_link_down,
.mac_link_up = mtk_mac_link_up,
+ .mac_disable_tx_lpi = mtk_mac_disable_tx_lpi,
+ .mac_enable_tx_lpi = mtk_mac_enable_tx_lpi,
};
static int mtk_mdio_init(struct mtk_eth *eth)
@@ -830,17 +878,12 @@ static int mtk_mdio_init(struct mtk_eth *eth)
int ret;
u32 val;
- mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus");
+ mii_np = of_get_available_child_by_name(eth->dev->of_node, "mdio-bus");
if (!mii_np) {
dev_err(eth->dev, "no %s child node found", "mdio-bus");
return -ENODEV;
}
- if (!of_device_is_available(mii_np)) {
- ret = -ENODEV;
- goto err_put_node;
- }
-
eth->mii_bus = devm_mdiobus_alloc(eth->dev);
if (!eth->mii_bus) {
ret = -ENOMEM;
@@ -4474,6 +4517,20 @@ static int mtk_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
return phylink_ethtool_set_pauseparam(mac->phylink, pause);
}
+static int mtk_get_eee(struct net_device *dev, struct ethtool_keee *eee)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ return phylink_ethtool_get_eee(mac->phylink, eee);
+}
+
+static int mtk_set_eee(struct net_device *dev, struct ethtool_keee *eee)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ return phylink_ethtool_set_eee(mac->phylink, eee);
+}
+
static u16 mtk_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
@@ -4506,6 +4563,8 @@ static const struct ethtool_ops mtk_ethtool_ops = {
.set_pauseparam = mtk_set_pauseparam,
.get_rxnfc = mtk_get_rxnfc,
.set_rxnfc = mtk_set_rxnfc,
+ .get_eee = mtk_get_eee,
+ .set_eee = mtk_set_eee,
};
static const struct net_device_ops mtk_netdev_ops = {
@@ -4615,6 +4674,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
mac->phylink_config.type = PHYLINK_NETDEV;
mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD;
+ mac->phylink_config.lpi_capabilities = MAC_100FD | MAC_1000FD |
+ MAC_2500FD;
+ mac->phylink_config.lpi_timer_default = 1000;
/* MT7623 gmac0 is now missing its speed-specific PLL configuration
* in its .mac_config method (since state->speed is not valid there.
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 0d5225f1d3ee..90a377ab4359 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -453,6 +453,8 @@
#define MAC_MCR_RX_FIFO_CLR_DIS BIT(12)
#define MAC_MCR_BACKOFF_EN BIT(9)
#define MAC_MCR_BACKPR_EN BIT(8)
+#define MAC_MCR_EEE1G BIT(7)
+#define MAC_MCR_EEE100M BIT(6)
#define MAC_MCR_FORCE_RX_FC BIT(5)
#define MAC_MCR_FORCE_TX_FC BIT(4)
#define MAC_MCR_SPEED_1000 BIT(3)
@@ -461,6 +463,15 @@
#define MAC_MCR_FORCE_LINK BIT(0)
#define MAC_MCR_FORCE_LINK_DOWN (MAC_MCR_FORCE_MODE)
+/* Mac EEE control registers */
+#define MTK_MAC_EEECR(x) (0x10104 + (x * 0x100))
+#define MAC_EEE_WAKEUP_TIME_1000 GENMASK(31, 24)
+#define MAC_EEE_WAKEUP_TIME_100 GENMASK(23, 16)
+#define MAC_EEE_LPI_TXIDLE_THD GENMASK(15, 8)
+#define MAC_EEE_CKG_TXIDLE BIT(3)
+#define MAC_EEE_CKG_RXLPI BIT(2)
+#define MAC_EEE_LPI_MODE BIT(0)
+
/* Mac status registers */
#define MTK_MAC_MSR(x) (0x10108 + (x * 0x100))
#define MAC_MSR_EEE1G BIT(7)
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 25989c79c92e..76f202d7f055 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1427,15 +1427,10 @@ static int mtk_star_mdio_init(struct net_device *ndev)
of_node = dev->of_node;
- mdio_node = of_get_child_by_name(of_node, "mdio");
+ mdio_node = of_get_available_child_by_name(of_node, "mdio");
if (!mdio_node)
return -ENODEV;
- if (!of_device_is_available(mdio_node)) {
- ret = -ENODEV;
- goto out_put_node;
- }
-
priv->mii = devm_mdiobus_alloc(dev);
if (!priv->mii) {
ret = -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index b330020dc0d6..07b061a97a6e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -526,28 +526,6 @@ out:
return res;
}
-u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count)
-{
- struct mlx4_zone_entry *zone;
- int res = 0;
-
- spin_lock(&zones->lock);
-
- zone = __mlx4_find_zone_by_uid(zones, uid);
-
- if (NULL == zone) {
- res = -1;
- goto out;
- }
-
- __mlx4_free_from_zone(zone, obj, count);
-
-out:
- spin_unlock(&zones->lock);
-
- return res;
-}
-
u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count)
{
struct mlx4_zone_entry *zone;
@@ -682,9 +660,9 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
}
static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir,
- struct mlx4_db *db, int order)
+ struct mlx4_db *db, unsigned int order)
{
- int o;
+ unsigned int o;
int i;
for (o = order; o <= 1; ++o) {
@@ -712,7 +690,7 @@ found:
return 0;
}
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, unsigned int order)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_db_pgdir *pgdir;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 15c57e9517e9..b33285d755b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -48,60 +48,43 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_checksum.h>
#endif
+#include <net/page_pool/helpers.h>
#include "mlx4_en.h"
-static int mlx4_alloc_page(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_alloc *frag,
- gfp_t gfp)
-{
- struct page *page;
- dma_addr_t dma;
-
- page = alloc_page(gfp);
- if (unlikely(!page))
- return -ENOMEM;
- dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
- if (unlikely(dma_mapping_error(priv->ddev, dma))) {
- __free_page(page);
- return -ENOMEM;
- }
- frag->page = page;
- frag->dma = dma;
- frag->page_offset = priv->rx_headroom;
- return 0;
-}
-
static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring,
struct mlx4_en_rx_desc *rx_desc,
struct mlx4_en_rx_alloc *frags,
gfp_t gfp)
{
+ dma_addr_t dma;
int i;
for (i = 0; i < priv->num_frags; i++, frags++) {
if (!frags->page) {
- if (mlx4_alloc_page(priv, frags, gfp)) {
+ frags->page = page_pool_alloc_pages(ring->pp, gfp);
+ if (!frags->page) {
ring->alloc_fail++;
return -ENOMEM;
}
+ page_pool_fragment_page(frags->page, 1);
+ frags->page_offset = priv->rx_headroom;
+
ring->rx_alloc_pages++;
}
- rx_desc->data[i].addr = cpu_to_be64(frags->dma +
- frags->page_offset);
+ dma = page_pool_get_dma_addr(frags->page);
+ rx_desc->data[i].addr = cpu_to_be64(dma + frags->page_offset);
}
return 0;
}
static void mlx4_en_free_frag(const struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring,
struct mlx4_en_rx_alloc *frag)
{
- if (frag->page) {
- dma_unmap_page(priv->ddev, frag->dma,
- PAGE_SIZE, priv->dma_dir);
- __free_page(frag->page);
- }
+ if (frag->page)
+ page_pool_put_full_page(ring->pp, frag->page, false);
/* We need to clear all fields, otherwise a change of priv->log_rx_info
* could lead to see garbage later in frag->page.
*/
@@ -141,18 +124,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
(index << ring->log_stride);
struct mlx4_en_rx_alloc *frags = ring->rx_info +
(index << priv->log_rx_info);
- if (likely(ring->page_cache.index > 0)) {
- /* XDP uses a single page per frame */
- if (!frags->page) {
- ring->page_cache.index--;
- frags->page = ring->page_cache.buf[ring->page_cache.index].page;
- frags->dma = ring->page_cache.buf[ring->page_cache.index].dma;
- }
- frags->page_offset = XDP_PACKET_HEADROOM;
- rx_desc->data[0].addr = cpu_to_be64(frags->dma +
- XDP_PACKET_HEADROOM);
- return 0;
- }
return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp);
}
@@ -178,7 +149,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
frags = ring->rx_info + (index << priv->log_rx_info);
for (nr = 0; nr < priv->num_frags; nr++) {
en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
- mlx4_en_free_frag(priv, frags + nr);
+ mlx4_en_free_frag(priv, ring, frags + nr);
}
}
@@ -268,6 +239,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
u32 size, u16 stride, int node, int queue_index)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct page_pool_params pp = {};
struct mlx4_en_rx_ring *ring;
int err = -ENOMEM;
int tmp;
@@ -286,9 +258,26 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
- if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0)
+ pp.flags = PP_FLAG_DMA_MAP;
+ pp.pool_size = size * DIV_ROUND_UP(priv->rx_skb_size, PAGE_SIZE);
+ pp.nid = node;
+ pp.napi = &priv->rx_cq[queue_index]->napi;
+ pp.netdev = priv->dev;
+ pp.dev = &mdev->dev->persist->pdev->dev;
+ pp.dma_dir = priv->dma_dir;
+
+ ring->pp = page_pool_create(&pp);
+ if (!ring->pp)
goto err_ring;
+ if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0)
+ goto err_pp;
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_POOL,
+ ring->pp);
+ if (err)
+ goto err_xdp_info;
+
tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
sizeof(struct mlx4_en_rx_alloc));
ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node);
@@ -319,6 +308,8 @@ err_info:
ring->rx_info = NULL;
err_xdp_info:
xdp_rxq_info_unreg(&ring->xdp_rxq);
+err_pp:
+ page_pool_destroy(ring->pp);
err_ring:
kfree(ring);
*pring = NULL;
@@ -409,26 +400,6 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
}
}
-/* When the rx ring is running in page-per-packet mode, a released frame can go
- * directly into a small cache, to avoid unmapping or touching the page
- * allocator. In bpf prog performance scenarios, buffers are either forwarded
- * or dropped, never converted to skbs, so every page can come directly from
- * this cache when it is sized to be a multiple of the napi budget.
- */
-bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
- struct mlx4_en_rx_alloc *frame)
-{
- struct mlx4_en_page_cache *cache = &ring->page_cache;
-
- if (cache->index >= MLX4_EN_CACHE_SIZE)
- return false;
-
- cache->buf[cache->index].page = frame->page;
- cache->buf[cache->index].dma = frame->dma;
- cache->index++;
- return true;
-}
-
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
u32 size, u16 stride)
@@ -445,6 +416,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
xdp_rxq_info_unreg(&ring->xdp_rxq);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
kvfree(ring->rx_info);
+ page_pool_destroy(ring->pp);
ring->rx_info = NULL;
kfree(ring);
*pring = NULL;
@@ -453,14 +425,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring)
{
- int i;
-
- for (i = 0; i < ring->page_cache.index; i++) {
- dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma,
- PAGE_SIZE, priv->dma_dir);
- put_page(ring->page_cache.buf[i].page);
- }
- ring->page_cache.index = 0;
mlx4_en_free_rx_buf(priv, ring);
if (ring->stride <= TXBB_SIZE)
ring->buf -= TXBB_SIZE;
@@ -487,7 +451,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
if (unlikely(!page))
goto fail;
- dma = frags->dma;
+ dma = page_pool_get_dma_addr(page);
dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset,
frag_size, priv->dma_dir);
@@ -498,6 +462,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
if (frag_info->frag_stride == PAGE_SIZE / 2) {
frags->page_offset ^= PAGE_SIZE / 2;
release = page_count(page) != 1 ||
+ atomic_long_read(&page->pp_ref_count) != 1 ||
page_is_pfmemalloc(page) ||
page_to_nid(page) != numa_mem_id();
} else if (!priv->rx_headroom) {
@@ -511,10 +476,9 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
release = frags->page_offset + frag_info->frag_size > PAGE_SIZE;
}
if (release) {
- dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir);
frags->page = NULL;
} else {
- page_ref_inc(page);
+ page_pool_ref_page(page);
}
nr++;
@@ -784,7 +748,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Get pointer to first fragment since we haven't
* skb yet and cast it to ethhdr struct
*/
- dma = frags[0].dma + frags[0].page_offset;
+ dma = page_pool_get_dma_addr(frags[0].page);
+ dma += frags[0].page_offset;
dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
DMA_FROM_DEVICE);
@@ -823,7 +788,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
void *orig_data;
u32 act;
- dma = frags[0].dma + frags[0].page_offset;
+ dma = page_pool_get_dma_addr(frags[0].page);
+ dma += frags[0].page_offset;
dma_sync_single_for_cpu(priv->ddev, dma,
priv->frag_info[0].frag_size,
DMA_FROM_DEVICE);
@@ -886,6 +852,7 @@ xdp_drop_no_cnt:
skb = napi_get_frags(&cq->napi);
if (unlikely(!skb))
goto next;
+ skb_mark_for_recycle(skb);
if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
u64 timestamp = mlx4_en_get_cqe_ts(cqe);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 1ddb11cb25f9..87f35bcbeff8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -44,6 +44,7 @@
#include <linux/ipv6.h>
#include <linux/indirect_call_wrapper.h>
#include <net/ipv6.h>
+#include <net/page_pool/helpers.h>
#include "mlx4_en.h"
@@ -350,16 +351,10 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
int napi_mode)
{
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
- struct mlx4_en_rx_alloc frame = {
- .page = tx_info->page,
- .dma = tx_info->map0_dma,
- };
-
- if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
- dma_unmap_page(priv->ddev, tx_info->map0_dma,
- PAGE_SIZE, priv->dma_dir);
- put_page(tx_info->page);
- }
+ struct page_pool *pool = ring->recycle_ring->pp;
+
+ /* Note that napi_mode = 0 means ndo_close() path, not budget = 0 */
+ page_pool_put_full_page(pool, tx_info->page, !!napi_mode);
return tx_info->nr_txbb;
}
@@ -450,6 +445,8 @@ int mlx4_en_process_tx_cq(struct net_device *dev,
if (unlikely(!priv->port_up))
return 0;
+ if (unlikely(!napi_budget) && cq->type == TX_XDP)
+ return 0;
netdev_txq_bql_complete_prefetchw(ring->tx_queue);
@@ -1194,7 +1191,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
data = &tx_desc->data;
- dma = frame->dma;
+ dma = page_pool_get_dma_addr(frame->page);
tx_info->page = frame->page;
frame->page = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index d7d856d1758a..b213094ea30f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1478,12 +1478,6 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc);
u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count,
int align, u32 skip_mask, u32 *puid);
-/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator
- * <zones>.
- */
-u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones,
- u32 uid, u32 obj, u32 count);
-
/* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of
* specifying the uid when freeing an object, zone allocator could figure it by
* itself. Other parameters are similar to mlx4_zone_free.
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 28b70dcc652e..ad0d91a75184 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -247,20 +247,11 @@ struct mlx4_en_tx_desc {
struct mlx4_en_rx_alloc {
struct page *page;
- dma_addr_t dma;
u32 page_offset;
};
#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
-struct mlx4_en_page_cache {
- u32 index;
- struct {
- struct page *page;
- dma_addr_t dma;
- } buf[MLX4_EN_CACHE_SIZE];
-};
-
enum {
MLX4_EN_TX_RING_STATE_RECOVERING,
};
@@ -335,14 +326,14 @@ struct mlx4_en_rx_ring {
u16 stride;
u16 log_stride;
u16 cqn; /* index of port CQ associated with this ring */
+ u8 fcs_del;
u32 prod;
u32 cons;
u32 buf_size;
- u8 fcs_del;
+ struct page_pool *pp;
void *buf;
void *rx_info;
struct bpf_prog __rcu *xdp_prog;
- struct mlx4_en_page_cache page_cache;
unsigned long bytes;
unsigned long packets;
unsigned long csum_ok;
@@ -707,8 +698,6 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_priv *priv, unsigned int length,
int tx_ind, bool *doorbell_pending);
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
-bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
- struct mlx4_en_rx_alloc *frame);
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring,
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 4e43f4a7d246..e3d0b13c1610 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -147,26 +147,6 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
return err;
}
-int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx)
-{
- struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
- struct mlx4_mac_table *table = &info->mac_table;
- int i;
-
- for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
- if (!table->refs[i])
- continue;
-
- if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
- *idx = i;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-EXPORT_SYMBOL_GPL(mlx4_find_cached_mac);
-
static bool mlx4_need_mf_bond(struct mlx4_dev *dev)
{
int i, num_eth_ports = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
index c7216e84ef8c..86253a89c24c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -13,6 +13,50 @@ struct mlx5_vnic_diag_stats {
__be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
};
+static void mlx5_reporter_vnic_diagnose_counter_icm(struct mlx5_core_dev *dev,
+ struct devlink_fmsg *fmsg,
+ u16 vport_num, bool other_vport)
+{
+ u32 out_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {};
+ u32 in_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {};
+ u32 out_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {};
+ u32 in_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {};
+ u32 cur_alloc_icm;
+ int vhca_icm_ctrl;
+ u16 vhca_id;
+ int err;
+
+ err = mlx5_core_access_reg(dev, in_reg, sizeof(in_reg), out_reg,
+ sizeof(out_reg), MLX5_REG_NIC_CAP, 0, 0);
+ if (err) {
+ mlx5_core_warn(dev, "Reading nic_cap_reg failed. err = %d\n", err);
+ return;
+ }
+ vhca_icm_ctrl = MLX5_GET(nic_cap_reg, out_reg, vhca_icm_ctrl);
+ if (!vhca_icm_ctrl)
+ return;
+
+ MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id_valid, other_vport);
+ if (other_vport) {
+ err = mlx5_vport_get_vhca_id(dev, vport_num, &vhca_id);
+ if (err) {
+ mlx5_core_warn(dev, "vport to vhca_id failed. vport_num = %d, err = %d\n",
+ vport_num, err);
+ return;
+ }
+ MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id, vhca_id);
+ }
+ err = mlx5_core_access_reg(dev, in_icm_reg, sizeof(in_icm_reg),
+ out_icm_reg, sizeof(out_icm_reg),
+ MLX5_REG_VHCA_ICM_CTRL, 0, 0);
+ if (err) {
+ mlx5_core_warn(dev, "Reading vhca_icm_ctrl failed. err = %d\n", err);
+ return;
+ }
+ cur_alloc_icm = MLX5_GET(vhca_icm_ctrl_reg, out_icm_reg, cur_alloc_icm);
+ devlink_fmsg_u32_pair_put(fmsg, "icm_consumption", cur_alloc_icm);
+}
+
void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
struct devlink_fmsg *fmsg,
u16 vport_num, bool other_vport)
@@ -59,6 +103,8 @@ void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
}
+ if (MLX5_CAP_GEN(dev, nic_cap_reg))
+ mlx5_reporter_vnic_diagnose_counter_icm(dev, fmsg, vport_num, other_vport);
devlink_fmsg_obj_nest_end(fmsg);
devlink_fmsg_pair_nest_end(fmsg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 979fc56205e1..769e683f2488 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -95,8 +95,6 @@ struct page_pool;
#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
-#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18
-
/* Keep in sync with mlx5e_mpwrq_log_wqe_sz.
* These are theoretical maximums, which can be further restricted by
* capabilities. These values are used for static resource allocations and
@@ -386,7 +384,6 @@ enum {
MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
MLX5E_SQ_STATE_PENDING_XSK_TX,
MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
- MLX5E_SQ_STATE_XDP_MULTIBUF,
MLX5E_NUM_SQ_STATES, /* Must be kept last */
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 64b62ed17b07..aa36670d9a36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -10,6 +10,9 @@
#include <net/page_pool/types.h>
#include <net/xdp_sock_drv.h>
+#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18
+#define MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ 17
+
static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
{
u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size);
@@ -103,18 +106,22 @@ u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
enum mlx5e_mpwrq_umr_mode umr_mode)
{
u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
- u8 max_pages_per_wqe, max_log_mpwqe_size;
+ u8 max_pages_per_wqe, max_log_wqe_size_calc;
+ u8 max_log_wqe_size_cap;
u16 max_wqe_size;
/* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */
max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB;
max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe),
MLX5_UMR_FLEX_ALIGNMENT) / umr_entry_size;
- max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift;
+ max_log_wqe_size_calc = ilog2(max_pages_per_wqe) + page_shift;
+
+ WARN_ON_ONCE(max_log_wqe_size_calc < MLX5E_ORDER2_MAX_PACKET_MTU);
- WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU);
+ max_log_wqe_size_cap = mlx5_core_is_ecpf(mdev) ?
+ MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ : MLX5_MPWRQ_MAX_LOG_WQE_SZ;
- return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ);
+ return min_t(u8, max_log_wqe_size_calc, max_log_wqe_size_cap);
}
u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
@@ -1240,7 +1247,6 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
mlx5e_build_sq_param_common(mdev, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
- param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk);
mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 3f8986f9d862..bd5877acc5b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -33,7 +33,6 @@ struct mlx5e_sq_param {
struct mlx5_wq_param wq;
bool is_mpw;
bool is_tls;
- bool is_xdp_mb;
u16 stop_room;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 5f6a0605e4ae..f62fbfb67a1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -296,11 +296,16 @@ enum mlx5e_fec_supported_link_mode {
MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X,
MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X,
MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X,
MLX5E_MAX_FEC_SUPPORTED_LINK_MODE,
};
#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X
#define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X
+#define MLX5E_FEC_FIRST_200G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X
#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \
do { \
@@ -320,8 +325,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev,
return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE ||
(link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE &&
MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) ||
- (link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE &&
- MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm));
+ (link_mode < MLX5E_FEC_FIRST_200G_PER_LANE_MODE &&
+ MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)) ||
+ (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE &&
+ MLX5_CAP_PCAM_FEATURE(dev, fec_200G_per_lane_in_pplm));
}
/* get/set FEC admin field for a given speed */
@@ -368,6 +375,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write,
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X:
MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x);
break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 1600g_8x);
+ break;
default:
return -EINVAL;
}
@@ -421,6 +440,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap,
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X:
*fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x);
break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 1600g_8x);
+ break;
default:
return -EINVAL;
}
@@ -494,6 +525,26 @@ out:
return 0;
}
+static u16 mlx5e_remap_fec_conf_mode(enum mlx5e_fec_supported_link_mode link_mode,
+ u16 conf_fec)
+{
+ /* RS fec in ethtool is originally mapped to MLX5E_FEC_RS_528_514.
+ * For link modes up to 25G per lane, the value is kept.
+ * For 50G or 100G per lane, it's remapped to MLX5E_FEC_RS_544_514.
+ * For 200G per lane, remapped to MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD.
+ */
+ if (conf_fec != BIT(MLX5E_FEC_RS_528_514))
+ return conf_fec;
+
+ if (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE)
+ return BIT(MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD);
+
+ if (link_mode >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
+ return BIT(MLX5E_FEC_RS_544_514);
+
+ return conf_fec;
+}
+
int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
{
bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
@@ -530,14 +581,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
if (!mlx5e_is_fec_supported_link_mode(dev, i))
break;
- /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514
- * to link modes up to 25G per lane and to
- * MLX5E_FEC_RS_544_514 in the new link modes based on
- * 50G or 100G per lane
- */
- if (conf_fec == (1 << MLX5E_FEC_RS_528_514) &&
- i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
- conf_fec = (1 << MLX5E_FEC_RS_544_514);
+ conf_fec = mlx5e_remap_fec_conf_mode(i, conf_fec);
mlx5e_get_fec_cap_field(out, &fec_caps, i);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index d1da225f35da..fa2283dd383b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -61,6 +61,7 @@ enum {
MLX5E_FEC_NOFEC,
MLX5E_FEC_FIRECODE,
MLX5E_FEC_RS_528_514,
+ MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD = 4,
MLX5E_FEC_RS_544_514 = 7,
MLX5E_FEC_LLRS_272_257_1 = 9,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index afd654583b6b..131ed97ca997 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -326,7 +326,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
int node;
sq->pdev = c->pdev;
- sq->clock = &mdev->clock;
+ sq->clock = mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
sq->priv = c->priv;
@@ -696,7 +696,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
rq->pdev = c->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->tstamp = &priv->tstamp;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 25d751eba99b..e75759533ae0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -317,10 +317,8 @@ mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx
}
static void
-mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
- struct devlink_fmsg *fmsg)
+mlx5e_rx_reporter_diagnose_common_config(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg)
{
- struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
@@ -340,20 +338,100 @@ static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq,
devlink_fmsg_obj_nest_end(fmsg);
}
-static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
- struct devlink_fmsg *fmsg,
- struct netlink_ext_ack *extack)
+static void mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res *rx_res,
+ struct devlink_fmsg *fmsg)
{
- struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
- struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ unsigned int max_nch = mlx5e_rx_res_get_max_nch(rx_res);
int i;
- mutex_lock(&priv->state_lock);
+ devlink_fmsg_arr_pair_nest_start(fmsg, "Direct TIRs");
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
- goto unlock;
+ for (i = 0; i < max_nch; i++) {
+ devlink_fmsg_obj_nest_start(fmsg);
+
+ devlink_fmsg_u32_pair_put(fmsg, "ix", i);
+ devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rx_res_get_tirn_direct(rx_res, i));
+ devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rx_res_get_rqtn_direct(rx_res, i));
+
+ devlink_fmsg_obj_nest_end(fmsg);
+ }
+
+ devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static void mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(struct mlx5e_rss *rss, bool inner,
+ struct devlink_fmsg *fmsg)
+{
+ bool found_valid_tir = false;
+ int tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ if (!mlx5e_rss_valid_tir(rss, tt, inner))
+ continue;
+
+ if (!found_valid_tir) {
+ char *tir_msg = inner ? "Inner TIRs Numbers" : "TIRs Numbers";
+
+ found_valid_tir = true;
+ devlink_fmsg_arr_pair_nest_start(fmsg, tir_msg);
+ }
+
+ devlink_fmsg_obj_nest_start(fmsg);
+ devlink_fmsg_string_pair_put(fmsg, "tt", mlx5_ttc_get_name(tt));
+ devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rss_get_tirn(rss, tt, inner));
+ devlink_fmsg_obj_nest_end(fmsg);
+ }
+
+ if (found_valid_tir)
+ devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static void mlx5e_rx_reporter_diagnose_rx_res_rss_ix(struct mlx5e_rx_res *rx_res, u32 rss_idx,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_rss *rss = mlx5e_rx_res_rss_get(rx_res, rss_idx);
+
+ if (!rss)
+ return;
+
+ devlink_fmsg_obj_nest_start(fmsg);
+
+ devlink_fmsg_u32_pair_put(fmsg, "Index", rss_idx);
+ devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rss_get_rqtn(rss));
+ mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, false, fmsg);
+ if (mlx5e_rss_get_inner_ft_support(rss))
+ mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, true, fmsg);
+
+ devlink_fmsg_obj_nest_end(fmsg);
+}
+
+static void mlx5e_rx_reporter_diagnose_rx_res_rss(struct mlx5e_rx_res *rx_res,
+ struct devlink_fmsg *fmsg)
+{
+ int rss_ix;
+
+ devlink_fmsg_arr_pair_nest_start(fmsg, "RSS");
+ for (rss_ix = 0; rss_ix < MLX5E_MAX_NUM_RSS; rss_ix++)
+ mlx5e_rx_reporter_diagnose_rx_res_rss_ix(rx_res, rss_ix, fmsg);
+ devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_rx_res *rx_res = priv->rx_res;
+
+ mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX resources");
+ mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(rx_res, fmsg);
+ mlx5e_rx_reporter_diagnose_rx_res_rss(rx_res, fmsg);
+ mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static void mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ int i;
- mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg);
devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
for (i = 0; i < priv->channels.num; i++) {
@@ -367,7 +445,24 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
}
if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state))
mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg);
+
devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ mlx5e_rx_reporter_diagnose_common_config(priv, fmsg);
+ mlx5e_rx_reporter_diagnose_rqs(priv, fmsg);
+ mlx5e_rx_reporter_diagnose_rx_res(priv, fmsg);
unlock:
mutex_unlock(&priv->state_lock);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 09433b91be17..532c7fa94d17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -16,7 +16,6 @@ static const char * const sq_sw_state_type_name[] = {
[MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline",
[MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx",
[MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync",
- [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf",
};
static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
index 5f742f896600..0d8ccc7b6c11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -81,6 +81,11 @@ struct mlx5e_rss {
refcount_t refcnt;
};
+bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss)
+{
+ return rss->inner_ft_support;
+}
+
void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_channels)
{
rss->indir.actual_table_size = mlx5e_rqt_size(rss->mdev, num_channels);
@@ -449,6 +454,16 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
return mlx5e_tir_get_tirn(tir);
}
+u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss)
+{
+ return mlx5e_rqt_get_rqtn(&rss->rqt);
+}
+
+bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner)
+{
+ return !!rss_get_tir(rss, tt, inner);
+}
+
/* Fill the "tirn" output parameter.
* Create the requested TIR if it's its first usage.
*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
index d0df98963c8d..72089f5f473c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -32,8 +32,11 @@ void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss);
void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss);
unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss);
+bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss);
u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
bool inner);
+bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner);
+u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss);
int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
enum mlx5_traffic_types tt,
const struct mlx5e_packet_merge_param *init_pkt_merge_param,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
index a86eade9a9e0..9d8b2f5f6c96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -5,8 +5,6 @@
#include "channels.h"
#include "params.h"
-#define MLX5E_MAX_NUM_RSS 16
-
struct mlx5e_rx_res {
struct mlx5_core_dev *mdev; /* primary */
enum mlx5e_rx_res_features features;
@@ -497,6 +495,11 @@ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
mlx5e_rx_res_free(res);
}
+unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res)
+{
+ return res->max_nch;
+}
+
u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
{
return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
@@ -522,7 +525,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
return mlx5e_tir_get_tirn(&res->ptp.tir);
}
-static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
{
return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
index 7b1a9f0f1874..05b438043bcb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -10,6 +10,8 @@
#include "fs.h"
#include "rss.h"
+#define MLX5E_MAX_NUM_RSS 16
+
struct mlx5e_rx_res;
struct mlx5e_channels;
@@ -34,6 +36,9 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res);
+bool mlx5_rx_res_rss_inner_ft_support(struct mlx5e_rx_res *res);
/* Activate/deactivate API */
void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index d6c12d0ea55b..2e528b2c34d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -73,11 +73,6 @@ struct mlx5e_tc_act {
bool is_terminating_action;
};
-struct mlx5e_tc_flow_action {
- unsigned int num_entries;
- struct flow_action_entry **entries;
-};
-
extern struct mlx5e_tc_act mlx5e_tc_act_drop;
extern struct mlx5e_tc_act mlx5e_tc_act_trap;
extern struct mlx5e_tc_act mlx5e_tc_act_accept;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 53ca16cb9c41..140606fcd23b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -46,7 +46,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params
rq->pdev = t->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->tstamp = &priv->tstamp;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 94b291662087..6f3094a479e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -289,9 +289,9 @@ static u64 mlx5e_xsk_fill_timestamp(void *_priv)
ts = get_cqe_ts(priv->cqe);
if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev))
- return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts);
+ return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts);
- return mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts);
+ return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts);
}
static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv)
@@ -546,6 +546,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
bool inline_ok;
bool linear;
u16 pi;
+ int i;
struct mlx5e_xdpsq_stats *stats = sq->stats;
@@ -612,41 +613,33 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
- int i;
-
- memset(&cseg->trailer, 0, sizeof(cseg->trailer));
- memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
-
- eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ memset(&cseg->trailer, 0, sizeof(cseg->trailer));
+ memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
- for (i = 0; i < num_frags; i++) {
- skb_frag_t *frag = &xdptxdf->sinfo->frags[i];
- dma_addr_t addr;
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
- addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] :
- page_pool_get_dma_addr(skb_frag_page(frag)) +
- skb_frag_off(frag);
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &xdptxdf->sinfo->frags[i];
+ dma_addr_t addr;
- dseg->addr = cpu_to_be64(addr);
- dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
- dseg->lkey = sq->mkey_be;
- dseg++;
- }
+ addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] :
+ page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ dseg->addr = cpu_to_be64(addr);
+ dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
+ dseg->lkey = sq->mkey_be;
+ dseg++;
+ }
- sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
- .num_wqebbs = num_wqebbs,
- .num_pkts = 1,
- };
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- sq->pc += num_wqebbs;
- } else {
- cseg->fm_ce_se = 0;
+ sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_pkts = 1,
+ };
- sq->pc++;
- }
+ sq->pc += num_wqebbs;
xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, eseg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 9240cfe25d10..d743e823362a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -72,7 +72,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
rq->netdev = c->netdev;
rq->priv = c->priv;
rq->tstamp = c->tstamp;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cae39198b4db..f9113cb13a0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -237,6 +237,27 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT,
ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT,
ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_1_200GBASE_CR1_KR1, ext,
+ ETHTOOL_LINK_MODE_200000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseVR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_2_400GBASE_CR2_KR2, ext,
+ ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_4_800GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT);
}
static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
@@ -931,6 +952,7 @@ static const u32 pplm_fec_2_ethtool[] = {
[MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
[MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS,
[MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS,
+ [MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD] = ETHTOOL_FEC_RS,
};
static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a814b63ed97e..5d5e7b19c396 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -737,7 +737,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
rq->netdev = c->netdev;
rq->priv = c->priv;
rq->tstamp = c->tstamp;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;
@@ -1614,7 +1614,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int err;
sq->pdev = c->pdev;
- sq->clock = &mdev->clock;
+ sq->clock = mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
sq->mdev = c->mdev;
@@ -2023,41 +2023,12 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
csp.min_inline_mode = sq->min_inline_mode;
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- if (param->is_xdp_mb)
- set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
-
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
if (err)
goto err_free_xdpsq;
mlx5e_set_xmit_fp(sq, param->is_mpw);
- if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
- unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
- unsigned int inline_hdr_sz = 0;
- int i;
-
- if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
- ds_cnt++;
- }
-
- /* Pre initialize fixed WQE fields */
- for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
-
- sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
- .num_wqebbs = 1,
- .num_pkts = 1,
- };
-
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
- }
- }
-
return 0;
err_free_xdpsq:
@@ -3816,8 +3787,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
/* MQPRIO is another toplevel qdisc that can't be attached
* simultaneously with the offloaded HTB.
*/
- if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq)))
- return -EINVAL;
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ NL_SET_ERR_MSG_MOD(mqprio->extack,
+ "MQPRIO cannot be configured when HTB offload is enabled.");
+ return -EOPNOTSUPP;
+ }
switch (mqprio->mode) {
case TC_MQPRIO_MODE_DCB:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index fdff9fd8a89e..07f38f472a27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -65,6 +65,7 @@
#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
+#define MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE 0x8
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
@@ -855,6 +856,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
/* RQ */
mlx5e_build_rq_params(mdev, params);
+ if (!mlx5e_is_uplink_rep(priv) && mlx5_core_is_ecpf(mdev))
+ params->log_rq_mtu_frames = MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE;
/* If netdev is already registered (e.g. move from nic profile to uplink,
* RTNL lock must be held before triggering netdev notifiers.
@@ -886,6 +889,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev,
netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
netdev->watchdog_timeo = 15 * HZ;
+ if (mlx5_core_is_ecpf(mdev))
+ netdev->tx_queue_len = 1 << MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
netdev->hw_features |= NETIF_F_HW_TC;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 1d60465cc2ca..2f7a543feca6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -166,6 +166,9 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
struct udphdr *udph;
struct iphdr *iph;
+ if (skb_linearize(skb))
+ goto out;
+
/* We are only going to peek, no need to clone the SKB */
if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 20cc01ceee8a..0fa0333106a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -4157,37 +4157,12 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
}
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
-static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id)
-{
- int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
- void *query_ctx;
- void *hca_caps;
- int err;
-
- *vhca_id = 0;
-
- query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
- if (!query_ctx)
- return -ENOMEM;
-
- err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx);
- if (err)
- goto out_free;
-
- hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id);
-
-out_free:
- kfree(query_ctx);
- return err;
-}
-
int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num)
{
u16 *old_entry, *vhca_map_entry, vhca_id;
int err;
- err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id);
if (err) {
esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n",
vport_num, err);
@@ -4213,7 +4188,7 @@ void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num)
u16 *vhca_map_entry, vhca_id;
int err;
- err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id);
if (err)
esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n",
vport_num, err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index d91ea53eb394..01c5f5990f9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -6,6 +6,7 @@
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/events.h"
+#include "hwmon.h"
struct mlx5_event_nb {
struct mlx5_nb nb;
@@ -153,21 +154,50 @@ static int any_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
+#if IS_ENABLED(CONFIG_HWMON)
+static void print_sensor_names_in_bit_set(struct mlx5_core_dev *dev, struct mlx5_hwmon *hwmon,
+ u64 bit_set, int bit_set_offset)
+{
+ unsigned long *bit_set_ptr = (unsigned long *)&bit_set;
+ int num_bits = sizeof(bit_set) * BITS_PER_BYTE;
+ int i;
+
+ for_each_set_bit(i, bit_set_ptr, num_bits) {
+ const char *sensor_name = hwmon_get_sensor_name(hwmon, i + bit_set_offset);
+
+ mlx5_core_warn(dev, "Sensor name[%d]: %s\n", i + bit_set_offset, sensor_name);
+ }
+}
+#endif /* CONFIG_HWMON */
+
/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
{
struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_core_dev *dev = events->dev;
struct mlx5_eqe *eqe = data;
u64 value_lsb;
u64 value_msb;
value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ /* bit 1-63 are not supported for NICs,
+ * hence read only bit 0 (asic) from lsb.
+ */
+ value_lsb &= 0x1;
value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
- mlx5_core_warn(events->dev,
- "High temperature on sensors with bit set %llx %llx",
- value_msb, value_lsb);
+ if (net_ratelimit()) {
+ mlx5_core_warn(dev, "High temperature on sensors with bit set %#llx %#llx.\n",
+ value_msb, value_lsb);
+#if IS_ENABLED(CONFIG_HWMON)
+ if (dev->hwmon) {
+ print_sensor_names_in_bit_set(dev, dev->hwmon, value_lsb, 0);
+ print_sensor_names_in_bit_set(dev, dev->hwmon, value_msb,
+ sizeof(value_lsb) * BITS_PER_BYTE);
+ }
+#endif
+ }
return NOTIFY_OK;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c
index 353f81dccd1c..4ba2636d7fb6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c
@@ -416,3 +416,8 @@ void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev)
mlx5_hwmon_free(hwmon);
mdev->hwmon = NULL;
}
+
+const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel)
+{
+ return hwmon->temp_channel_desc[channel].sensor_name;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h
index 999654a9b9da..f38271c22c10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h
@@ -10,6 +10,7 @@
int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev);
void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev);
+const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel);
#else
static inline int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index bde79cac33a9..d832a12ffec0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -97,7 +97,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
mlx5_del_flow_rules(lag_definer->rules[idx]);
}
j = ldev->buckets;
- };
+ }
goto destroy_fg;
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index d61a1a9297c9..65a94e46edcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -43,6 +43,8 @@
#include <linux/cpufeature.h>
#endif /* CONFIG_X86 */
+#define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity)
+
enum {
MLX5_PIN_MODE_IN = 0x0,
MLX5_PIN_MODE_OUT = 0x1,
@@ -77,6 +79,56 @@ enum {
MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000,
};
+struct mlx5_clock_dev_state {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_devcom_comp_dev *compdev;
+ struct mlx5_nb pps_nb;
+ struct work_struct out_work;
+};
+
+struct mlx5_clock_priv {
+ struct mlx5_clock clock;
+ struct mlx5_core_dev *mdev;
+ struct mutex lock; /* protect mdev and used in PTP callbacks */
+ struct mlx5_core_dev *event_mdev;
+};
+
+static struct mlx5_clock_priv *clock_priv(struct mlx5_clock *clock)
+{
+ return container_of(clock, struct mlx5_clock_priv, clock);
+}
+
+static void mlx5_clock_lockdep_assert(struct mlx5_clock *clock)
+{
+ if (!clock->shared)
+ return;
+
+ lockdep_assert(lockdep_is_held(&clock_priv(clock)->lock));
+}
+
+static struct mlx5_core_dev *mlx5_clock_mdev_get(struct mlx5_clock *clock)
+{
+ mlx5_clock_lockdep_assert(clock);
+
+ return clock_priv(clock)->mdev;
+}
+
+static void mlx5_clock_lock(struct mlx5_clock *clock)
+{
+ if (!clock->shared)
+ return;
+
+ mutex_lock(&clock_priv(clock)->lock);
+}
+
+static void mlx5_clock_unlock(struct mlx5_clock *clock)
+{
+ if (!clock->shared)
+ return;
+
+ mutex_unlock(&clock_priv(clock)->lock);
+}
+
static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
{
return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev));
@@ -94,6 +146,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
}
+static int mlx5_clock_identity_get(struct mlx5_core_dev *mdev,
+ u8 identify[MLX5_RT_CLOCK_IDENTITY_SIZE])
+{
+ u32 out[MLX5_ST_SZ_DW(mrtcq_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mrtcq_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in),
+ out, sizeof(out), MLX5_REG_MRTCQ, 0, 0);
+ if (!err)
+ memcpy(identify, MLX5_ADDR_OF(mrtcq_reg, out, rt_clock_identity),
+ MLX5_RT_CLOCK_IDENTITY_SIZE);
+
+ return err;
+}
+
static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
{
/* Optimal shift constant leads to corrections above just 1 scaled ppm.
@@ -119,21 +187,30 @@ static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz));
}
+static s32 mlx5_clock_getmaxphase(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ?
+ MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX :
+ MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX;
+}
+
static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
+ s32 ret;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ ret = mlx5_clock_getmaxphase(mdev);
+ mlx5_clock_unlock(clock);
- return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ?
- MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX :
- MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX;
+ return ret;
}
static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta)
{
- s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info);
+ s64 max = mlx5_clock_getmaxphase(mdev);
if (delta < -max || delta > max)
return false;
@@ -209,7 +286,7 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time,
if (real_time_mode)
*device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX));
else
- *device_time = mlx5_timecounter_cyc2time(&mdev->clock, device);
+ *device_time = mlx5_timecounter_cyc2time(mdev->clock, device);
return 0;
}
@@ -220,16 +297,23 @@ static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp,
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct system_time_snapshot history_begin = {0};
struct mlx5_core_dev *mdev;
+ int err;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
- if (!mlx5_is_ptm_source_time_available(mdev))
- return -EBUSY;
+ if (!mlx5_is_ptm_source_time_available(mdev)) {
+ err = -EBUSY;
+ goto unlock;
+ }
ktime_get_snapshot(&history_begin);
- return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev,
- &history_begin, cts);
+ err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev,
+ &history_begin, cts);
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
#endif /* CONFIG_X86 */
@@ -263,8 +347,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
{
struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles);
struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer);
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
- clock);
+ struct mlx5_core_dev *mdev = mlx5_clock_mdev_get(clock);
return mlx5_read_time(mdev, NULL, false) & cc->mask;
}
@@ -272,7 +355,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer;
u32 sign;
@@ -295,12 +378,10 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
static void mlx5_pps_out(struct work_struct *work)
{
- struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
- out_work);
- struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
- pps_info);
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
- clock);
+ struct mlx5_clock_dev_state *clock_state = container_of(work, struct mlx5_clock_dev_state,
+ out_work);
+ struct mlx5_core_dev *mdev = clock_state->mdev;
+ struct mlx5_clock *clock = mdev->clock;
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
unsigned long flags;
int i;
@@ -330,7 +411,8 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info)
unsigned long flags;
clock = container_of(ptp_info, struct mlx5_clock, ptp_info);
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
timer = &clock->timer;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
@@ -342,6 +424,7 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info)
write_sequnlock_irqrestore(&clock->lock, flags);
out:
+ mlx5_clock_unlock(clock);
return timer->overflow_period;
}
@@ -361,15 +444,12 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
return mlx5_set_mtutc(mdev, in, sizeof(in));
}
-static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+static int mlx5_clock_settime(struct mlx5_core_dev *mdev, struct mlx5_clock *clock,
+ const struct timespec64 *ts)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_timer *timer = &clock->timer;
- struct mlx5_core_dev *mdev;
unsigned long flags;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
-
if (mlx5_modify_mtutc_allowed(mdev)) {
int err = mlx5_ptp_settime_real_time(mdev, ts);
@@ -385,6 +465,20 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64
return 0;
}
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev;
+ int err;
+
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ err = mlx5_clock_settime(mdev, clock, ts);
+ mlx5_clock_unlock(clock);
+
+ return err;
+}
+
static
struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev,
struct ptp_system_timestamp *sts)
@@ -404,7 +498,8 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
struct mlx5_core_dev *mdev;
u64 cycles, ns;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
if (mlx5_real_time_mode(mdev)) {
*ts = mlx5_ptp_gettimex_real_time(mdev, sts);
goto out;
@@ -414,6 +509,7 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
ns = mlx5_timecounter_cyc2time(clock, cycles);
*ts = ns_to_timespec64(ns);
out:
+ mlx5_clock_unlock(clock);
return 0;
}
@@ -444,14 +540,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
+ int err = 0;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
if (mlx5_modify_mtutc_allowed(mdev)) {
- int err = mlx5_ptp_adjtime_real_time(mdev, delta);
+ err = mlx5_ptp_adjtime_real_time(mdev, delta);
if (err)
- return err;
+ goto unlock;
}
write_seqlock_irqsave(&clock->lock, flags);
@@ -459,17 +557,23 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
- return 0;
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
+ int err;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ err = mlx5_ptp_adjtime_real_time(mdev, delta);
+ mlx5_clock_unlock(clock);
- return mlx5_ptp_adjtime_real_time(mdev, delta);
+ return err;
}
static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
@@ -498,15 +602,17 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
+ int err = 0;
u32 mult;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
if (mlx5_modify_mtutc_allowed(mdev)) {
- int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
+ err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
if (err)
- return err;
+ goto unlock;
}
mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm);
@@ -518,7 +624,9 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
write_sequnlock_irqrestore(&clock->lock, flags);
ptp_schedule_worker(clock->ptp, timer->overflow_period);
- return 0;
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static int mlx5_extts_configure(struct ptp_clock_info *ptp,
@@ -527,18 +635,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
{
struct mlx5_clock *clock =
container_of(ptp, struct mlx5_clock, ptp_info);
- struct mlx5_core_dev *mdev =
- container_of(clock, struct mlx5_core_dev, clock);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ struct mlx5_core_dev *mdev;
u32 field_select = 0;
u8 pin_mode = 0;
u8 pattern = 0;
int pin = -1;
int err = 0;
- if (!MLX5_PPS_CAP(mdev))
- return -EOPNOTSUPP;
-
/* Reject requests with unsupported flags */
if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
PTP_RISING_EDGE |
@@ -569,6 +673,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
field_select = MLX5_MTPPS_FS_ENABLE;
}
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+
+ if (!MLX5_PPS_CAP(mdev)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
MLX5_SET(mtpps_reg, in, pin, pin);
MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
MLX5_SET(mtpps_reg, in, pattern, pattern);
@@ -577,15 +689,23 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
- return err;
+ goto unlock;
+
+ err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on);
+ if (err)
+ goto unlock;
+
+ clock->pps_info.pin_armed[pin] = on;
+ clock_priv(clock)->event_mdev = mdev;
- return mlx5_set_mtppse(mdev, pin, 0,
- MLX5_EVENT_MODE_REPETETIVE & on);
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
u64 cycles_now, cycles_delta;
u64 nsec_now, nsec_delta;
struct mlx5_timer *timer;
@@ -644,7 +764,7 @@ static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev,
struct ptp_clock_request *rq,
u32 *out_pulse_duration_ns)
{
- struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ struct mlx5_pps *pps_info = &mdev->clock->pps_info;
u32 out_pulse_duration;
struct timespec64 ts;
@@ -677,7 +797,7 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo
u32 *field_select, u32 *out_pulse_duration_ns,
u64 *period, u64 *time_stamp)
{
- struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ struct mlx5_pps *pps_info = &mdev->clock->pps_info;
struct ptp_clock_time *time = &rq->perout.start;
struct timespec64 ts;
@@ -712,26 +832,18 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
{
struct mlx5_clock *clock =
container_of(ptp, struct mlx5_clock, ptp_info);
- struct mlx5_core_dev *mdev =
- container_of(clock, struct mlx5_core_dev, clock);
- bool rt_mode = mlx5_real_time_mode(mdev);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
u32 out_pulse_duration_ns = 0;
+ struct mlx5_core_dev *mdev;
u32 field_select = 0;
u64 npps_period = 0;
u64 time_stamp = 0;
u8 pin_mode = 0;
u8 pattern = 0;
+ bool rt_mode;
int pin = -1;
int err = 0;
- if (!MLX5_PPS_CAP(mdev))
- return -EOPNOTSUPP;
-
- /* Reject requests with unsupported flags */
- if (mlx5_perout_verify_flags(mdev, rq->perout.flags))
- return -EOPNOTSUPP;
-
if (rq->perout.index >= clock->ptp_info.n_pins)
return -EINVAL;
@@ -740,14 +852,29 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
if (pin < 0)
return -EBUSY;
- if (on) {
- bool rt_mode = mlx5_real_time_mode(mdev);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ rt_mode = mlx5_real_time_mode(mdev);
+
+ if (!MLX5_PPS_CAP(mdev)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ /* Reject requests with unsupported flags */
+ if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ if (on) {
pin_mode = MLX5_PIN_MODE_OUT;
pattern = MLX5_OUT_PATTERN_PERIODIC;
- if (rt_mode && rq->perout.start.sec > U32_MAX)
- return -EINVAL;
+ if (rt_mode && rq->perout.start.sec > U32_MAX) {
+ err = -EINVAL;
+ goto unlock;
+ }
field_select |= MLX5_MTPPS_FS_PIN_MODE |
MLX5_MTPPS_FS_PATTERN |
@@ -760,7 +887,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
else
err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode);
if (err)
- return err;
+ goto unlock;
}
MLX5_SET(mtpps_reg, in, pin, pin);
@@ -773,13 +900,16 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns);
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
- return err;
+ goto unlock;
if (rt_mode)
- return 0;
+ goto unlock;
+
+ err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on);
- return mlx5_set_mtppse(mdev, pin, 0,
- MLX5_EVENT_MODE_REPETETIVE & on);
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static int mlx5_pps_configure(struct ptp_clock_info *ptp,
@@ -866,10 +996,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
mtpps_size, MLX5_REG_MTPPS, 0, 0);
}
-static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
+static int mlx5_get_pps_pin_mode(struct mlx5_core_dev *mdev, u8 pin)
{
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
-
u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
u8 mode;
int err;
@@ -888,8 +1016,9 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
return PTP_PF_NONE;
}
-static void mlx5_init_pin_config(struct mlx5_clock *clock)
+static void mlx5_init_pin_config(struct mlx5_core_dev *mdev)
{
+ struct mlx5_clock *clock = mdev->clock;
int i;
if (!clock->ptp_info.n_pins)
@@ -910,15 +1039,15 @@ static void mlx5_init_pin_config(struct mlx5_clock *clock)
sizeof(clock->ptp_info.pin_config[i].name),
"mlx5_pps%d", i);
clock->ptp_info.pin_config[i].index = i;
- clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
+ clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(mdev, i);
clock->ptp_info.pin_config[i].chan = 0;
}
}
static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ struct mlx5_clock *clock = mdev->clock;
mlx5_query_mtpps(mdev, out, sizeof(out));
@@ -968,16 +1097,16 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev,
static int mlx5_pps_event(struct notifier_block *nb,
unsigned long type, void *data)
{
- struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+ struct mlx5_clock_dev_state *clock_state = mlx5_nb_cof(nb, struct mlx5_clock_dev_state,
+ pps_nb);
+ struct mlx5_core_dev *mdev = clock_state->mdev;
+ struct mlx5_clock *clock = mdev->clock;
struct ptp_clock_event ptp_event;
struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
- struct mlx5_core_dev *mdev;
unsigned long flags;
u64 ns;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
-
switch (clock->ptp_info.pin_config[pin].func) {
case PTP_PF_EXTTS:
ptp_event.index = pin;
@@ -997,11 +1126,15 @@ static int mlx5_pps_event(struct notifier_block *nb,
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
+ if (clock->shared) {
+ mlx5_core_warn(mdev, " Received unexpected PPS out event\n");
+ break;
+ }
ns = perout_conf_next_event_timer(mdev, clock);
write_seqlock_irqsave(&clock->lock, flags);
clock->pps_info.start[pin] = ns;
write_sequnlock_irqrestore(&clock->lock, flags);
- schedule_work(&clock->pps_info.out_work);
+ schedule_work(&clock_state->out_work);
break;
default:
mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
@@ -1013,7 +1146,7 @@ static int mlx5_pps_event(struct notifier_block *nb,
static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer = &clock->timer;
u32 dev_freq;
@@ -1029,10 +1162,10 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
ktime_to_ns(ktime_get_real()));
}
-static void mlx5_init_overflow_period(struct mlx5_clock *clock)
+static void mlx5_init_overflow_period(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer = &clock->timer;
u64 overflow_cycles;
u64 frac = 0;
@@ -1065,7 +1198,7 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock)
static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_ib_clock_info *info;
struct mlx5_timer *timer;
@@ -1088,7 +1221,7 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
u8 log_max_freq_adjustment = 0;
@@ -1107,7 +1240,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
/* Configure the PHC */
clock->ptp_info = mlx5_ptp_clock_info;
@@ -1123,38 +1256,30 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
mlx5_timecounter_init(mdev);
mlx5_init_clock_info(mdev);
- mlx5_init_overflow_period(clock);
+ mlx5_init_overflow_period(mdev);
if (mlx5_real_time_mode(mdev)) {
struct timespec64 ts;
ktime_get_real_ts64(&ts);
- mlx5_ptp_settime(&clock->ptp_info, &ts);
+ mlx5_clock_settime(mdev, clock, &ts);
}
}
static void mlx5_init_pps(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
-
if (!MLX5_PPS_CAP(mdev))
return;
mlx5_get_pps_caps(mdev);
- mlx5_init_pin_config(clock);
+ mlx5_init_pin_config(mdev);
}
-void mlx5_init_clock(struct mlx5_core_dev *mdev)
+static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
-
- if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
- mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
- return;
- }
+ struct mlx5_clock *clock = mdev->clock;
seqlock_init(&clock->lock);
- INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
/* Initialize the device clock */
mlx5_init_timer_clock(mdev);
@@ -1163,35 +1288,27 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
mlx5_init_pps(mdev);
clock->ptp = ptp_clock_register(&clock->ptp_info,
- &mdev->pdev->dev);
+ clock->shared ? NULL : &mdev->pdev->dev);
if (IS_ERR(clock->ptp)) {
- mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+ mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n",
+ clock->shared ? "shared clock " : "",
PTR_ERR(clock->ptp));
clock->ptp = NULL;
}
- MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
- mlx5_eq_notifier_register(mdev, &clock->pps_nb);
-
if (clock->ptp)
ptp_schedule_worker(clock->ptp, 0);
}
-void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+static void mlx5_destroy_clock_dev(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
- if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
- return;
-
- mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
if (clock->ptp) {
ptp_clock_unregister(clock->ptp);
clock->ptp = NULL;
}
- cancel_work_sync(&clock->pps_info.out_work);
-
if (mdev->clock_info) {
free_page((unsigned long)mdev->clock_info);
mdev->clock_info = NULL;
@@ -1199,3 +1316,248 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
kfree(clock->ptp_info.pin_config);
}
+
+static void mlx5_clock_free(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock_priv *cpriv = clock_priv(mdev->clock);
+
+ mlx5_destroy_clock_dev(mdev);
+ mutex_destroy(&cpriv->lock);
+ kfree(cpriv);
+ mdev->clock = NULL;
+}
+
+static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared)
+{
+ struct mlx5_clock_priv *cpriv;
+ struct mlx5_clock *clock;
+
+ cpriv = kzalloc(sizeof(*cpriv), GFP_KERNEL);
+ if (!cpriv)
+ return -ENOMEM;
+
+ mutex_init(&cpriv->lock);
+ cpriv->mdev = mdev;
+ clock = &cpriv->clock;
+ clock->shared = shared;
+ mdev->clock = clock;
+ mlx5_clock_lock(clock);
+ mlx5_init_clock_dev(mdev);
+ mlx5_clock_unlock(clock);
+
+ if (!clock->shared)
+ return 0;
+
+ if (!clock->ptp) {
+ mlx5_core_warn(mdev, "failed to create ptp dev shared by multiple functions");
+ mlx5_clock_free(mdev);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key)
+{
+ struct mlx5_core_dev *peer_dev, *next = NULL;
+ struct mlx5_devcom_comp_dev *pos;
+
+ mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc,
+ MLX5_DEVCOM_SHARED_CLOCK,
+ key, NULL, mdev);
+ if (IS_ERR(mdev->clock_state->compdev))
+ return;
+
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
+ if (peer_dev->clock) {
+ next = peer_dev;
+ break;
+ }
+ }
+
+ if (next) {
+ mdev->clock = next->clock;
+ /* clock info is shared among all the functions using the same clock */
+ mdev->clock_info = next->clock_info;
+ } else {
+ mlx5_clock_alloc(mdev, true);
+ }
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+
+ if (!mdev->clock) {
+ mlx5_devcom_unregister_component(mdev->clock_state->compdev);
+ mdev->clock_state->compdev = NULL;
+ }
+}
+
+static void mlx5_shared_clock_unregister(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_dev, *next = NULL;
+ struct mlx5_clock *clock = mdev->clock;
+ struct mlx5_devcom_comp_dev *pos;
+
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
+ if (peer_dev->clock && peer_dev != mdev) {
+ next = peer_dev;
+ break;
+ }
+ }
+
+ if (next) {
+ struct mlx5_clock_priv *cpriv = clock_priv(clock);
+
+ mlx5_clock_lock(clock);
+ if (mdev == cpriv->mdev)
+ cpriv->mdev = next;
+ mlx5_clock_unlock(clock);
+ } else {
+ mlx5_clock_free(mdev);
+ }
+
+ mdev->clock = NULL;
+ mdev->clock_info = NULL;
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+
+ mlx5_devcom_unregister_component(mdev->clock_state->compdev);
+}
+
+static void mlx5_clock_arm_pps_in_event(struct mlx5_clock *clock,
+ struct mlx5_core_dev *new_mdev,
+ struct mlx5_core_dev *old_mdev)
+{
+ struct ptp_clock_info *ptp_info = &clock->ptp_info;
+ struct mlx5_clock_priv *cpriv = clock_priv(clock);
+ int i;
+
+ for (i = 0; i < ptp_info->n_pins; i++) {
+ if (ptp_info->pin_config[i].func != PTP_PF_EXTTS ||
+ !clock->pps_info.pin_armed[i])
+ continue;
+
+ if (new_mdev) {
+ mlx5_set_mtppse(new_mdev, i, 0, MLX5_EVENT_MODE_REPETETIVE);
+ cpriv->event_mdev = new_mdev;
+ } else {
+ cpriv->event_mdev = NULL;
+ }
+
+ if (old_mdev)
+ mlx5_set_mtppse(old_mdev, i, 0, MLX5_EVENT_MODE_DISABLE);
+ }
+}
+
+void mlx5_clock_load(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = mdev->clock;
+ struct mlx5_clock_priv *cpriv;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ INIT_WORK(&mdev->clock_state->out_work, mlx5_pps_out);
+ MLX5_NB_INIT(&mdev->clock_state->pps_nb, mlx5_pps_event, PPS_EVENT);
+ mlx5_eq_notifier_register(mdev, &mdev->clock_state->pps_nb);
+
+ if (!clock->shared) {
+ mlx5_clock_arm_pps_in_event(clock, mdev, NULL);
+ return;
+ }
+
+ cpriv = clock_priv(clock);
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_clock_lock(clock);
+ if (mdev == cpriv->mdev && mdev != cpriv->event_mdev)
+ mlx5_clock_arm_pps_in_event(clock, mdev, cpriv->event_mdev);
+ mlx5_clock_unlock(clock);
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+}
+
+void mlx5_clock_unload(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_dev, *next = NULL;
+ struct mlx5_clock *clock = mdev->clock;
+ struct mlx5_devcom_comp_dev *pos;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ if (!clock->shared) {
+ mlx5_clock_arm_pps_in_event(clock, NULL, mdev);
+ goto out;
+ }
+
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
+ if (peer_dev->clock && peer_dev != mdev) {
+ next = peer_dev;
+ break;
+ }
+ }
+
+ mlx5_clock_lock(clock);
+ if (mdev == clock_priv(clock)->event_mdev)
+ mlx5_clock_arm_pps_in_event(clock, next, mdev);
+ mlx5_clock_unlock(clock);
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+
+out:
+ mlx5_eq_notifier_unregister(mdev, &mdev->clock_state->pps_nb);
+ cancel_work_sync(&mdev->clock_state->out_work);
+}
+
+static struct mlx5_clock null_clock;
+
+int mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+ u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE];
+ struct mlx5_clock_dev_state *clock_state;
+ u64 key;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
+ mdev->clock = &null_clock;
+ mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+ return 0;
+ }
+
+ clock_state = kzalloc(sizeof(*clock_state), GFP_KERNEL);
+ if (!clock_state)
+ return -ENOMEM;
+ clock_state->mdev = mdev;
+ mdev->clock_state = clock_state;
+
+ if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) {
+ if (mlx5_clock_identity_get(mdev, identity)) {
+ mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n");
+ } else {
+ memcpy(&key, &identity, sizeof(key));
+ mlx5_shared_clock_register(mdev, key);
+ }
+ }
+
+ if (!mdev->clock) {
+ err = mlx5_clock_alloc(mdev, false);
+ if (err) {
+ kfree(clock_state);
+ mdev->clock_state = NULL;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ if (mdev->clock->shared)
+ mlx5_shared_clock_unregister(mdev);
+ else
+ mlx5_clock_free(mdev);
+ kfree(mdev->clock_state);
+ mdev->clock_state = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index bd95b9f8d143..c18a652c0faa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -33,6 +33,35 @@
#ifndef __LIB_CLOCK_H__
#define __LIB_CLOCK_H__
+#include <linux/ptp_clock_kernel.h>
+
+#define MAX_PIN_NUM 8
+struct mlx5_pps {
+ u8 pin_caps[MAX_PIN_NUM];
+ u64 start[MAX_PIN_NUM];
+ u8 enabled;
+ u64 min_npps_period;
+ u64 min_out_pulse_duration_ns;
+ bool pin_armed[MAX_PIN_NUM];
+};
+
+struct mlx5_timer {
+ struct cyclecounter cycles;
+ struct timecounter tc;
+ u32 nominal_c_mult;
+ unsigned long overflow_period;
+};
+
+struct mlx5_clock {
+ seqlock_t lock;
+ struct hwtstamp_config hwtstamp_config;
+ struct ptp_clock *ptp;
+ struct ptp_clock_info ptp_info;
+ struct mlx5_pps pps_info;
+ struct mlx5_timer timer;
+ bool shared;
+};
+
static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev)
{
u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format);
@@ -54,12 +83,14 @@ static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev)
typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64);
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
-void mlx5_init_clock(struct mlx5_core_dev *mdev);
+int mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+void mlx5_clock_load(struct mlx5_core_dev *mdev);
+void mlx5_clock_unload(struct mlx5_core_dev *mdev);
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
- return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1;
+ return mdev->clock->ptp ? ptp_clock_index(mdev->clock->ptp) : -1;
}
static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
@@ -87,8 +118,10 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
return ns_to_ktime(time);
}
#else
-static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
+static inline int mlx5_init_clock(struct mlx5_core_dev *mdev) { return 0; }
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_clock_load(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_clock_unload(struct mlx5_core_dev *mdev) {}
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index d58032dd0df7..c79699b94a02 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -11,6 +11,7 @@ enum mlx5_devcom_component {
MLX5_DEVCOM_MPV,
MLX5_DEVCOM_HCA_PORTS,
MLX5_DEVCOM_SD_GROUP,
+ MLX5_DEVCOM_SHARED_CLOCK,
MLX5_DEVCOM_NUM_COMPONENTS,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
index 9f13cea16446..eb3bd9c7f66e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -61,6 +61,25 @@ static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
}
}
+static const char *mlx5_traffic_types_names[MLX5_NUM_TT] = {
+ [MLX5_TT_IPV4_TCP] = "TT_IPV4_TCP",
+ [MLX5_TT_IPV6_TCP] = "TT_IPV6_TCP",
+ [MLX5_TT_IPV4_UDP] = "TT_IPV4_UDP",
+ [MLX5_TT_IPV6_UDP] = "TT_IPV6_UDP",
+ [MLX5_TT_IPV4_IPSEC_AH] = "TT_IPV4_IPSEC_AH",
+ [MLX5_TT_IPV6_IPSEC_AH] = "TT_IPV6_IPSEC_AH",
+ [MLX5_TT_IPV4_IPSEC_ESP] = "TT_IPV4_IPSEC_ESP",
+ [MLX5_TT_IPV6_IPSEC_ESP] = "TT_IPV6_IPSEC_ESP",
+ [MLX5_TT_IPV4] = "TT_IPV4",
+ [MLX5_TT_IPV6] = "TT_IPV6",
+ [MLX5_TT_ANY] = "TT_ANY"
+};
+
+const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt)
+{
+ return mlx5_traffic_types_names[tt];
+}
+
struct mlx5_etype_proto {
u16 etype;
u8 proto;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
index 92eea6bea310..ab9434fe3ae6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
@@ -49,6 +49,7 @@ struct ttc_params {
struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
};
+const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt);
struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ec956c4bcebd..710633d5fdbe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1038,7 +1038,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
mlx5_init_reserved_gids(dev);
- mlx5_init_clock(dev);
+ err = mlx5_init_clock(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize hardware clock\n");
+ goto err_tables_cleanup;
+ }
dev->vxlan = mlx5_vxlan_create(dev);
dev->geneve = mlx5_geneve_create(dev);
@@ -1046,7 +1050,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
err = mlx5_init_rl_table(dev);
if (err) {
mlx5_core_err(dev, "Failed to init rate limiting\n");
- goto err_tables_cleanup;
+ goto err_clock_cleanup;
}
err = mlx5_mpfs_init(dev);
@@ -1123,10 +1127,11 @@ err_mpfs_cleanup:
mlx5_mpfs_cleanup(dev);
err_rl_cleanup:
mlx5_cleanup_rl_table(dev);
-err_tables_cleanup:
+err_clock_cleanup:
mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_clock(dev);
+err_tables_cleanup:
mlx5_cleanup_reserved_gids(dev);
mlx5_cq_debugfs_cleanup(dev);
mlx5_fw_reset_cleanup(dev);
@@ -1359,6 +1364,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_eq_table;
}
+ mlx5_clock_load(dev);
+
err = mlx5_fw_tracer_init(dev->tracer);
if (err) {
mlx5_core_err(dev, "Failed to init FW tracer %d\n", err);
@@ -1442,6 +1449,7 @@ err_fpga_start:
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_clock_unload(dev);
mlx5_eq_table_destroy(dev);
err_eq_table:
mlx5_irq_table_destroy(dev);
@@ -1468,6 +1476,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_clock_unload(dev);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
mlx5_pagealloc_stop(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 99de67c3aa74..6fef1005c469 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -346,6 +346,8 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap
#define mlx5_vport_get_other_func_general_cap(dev, vport, out) \
mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL)
+int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id);
+
static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 50931584132b..3995df064101 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -1105,6 +1105,9 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
[MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
[MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
[MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000,
+ [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = 200000,
+ [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = 400000,
+ [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = 800000,
};
int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c
index 60cb4527588a..65740bb68b09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c
@@ -516,30 +516,6 @@ def_xa_destroy:
return NULL;
}
-/* Assure synchronization of the device steering tables with updates made by SW
- * insertion.
- */
-int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
-{
- int ret = 0;
-
- if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
- mlx5dr_domain_lock(dmn);
- ret = mlx5dr_send_ring_force_drain(dmn);
- mlx5dr_domain_unlock(dmn);
- if (ret) {
- mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n",
- flags, ret);
- return ret;
- }
- }
-
- if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW)
- ret = mlx5dr_cmd_sync_steering(dmn->mdev);
-
- return ret;
-}
-
int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
{
if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c
index f57c84e5128b..4fd4e8483382 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c
@@ -1331,36 +1331,3 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
kfree(send_ring->sync_buff);
kfree(send_ring);
}
-
-int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
-{
- struct mlx5dr_send_ring *send_ring = dmn->send_ring;
- struct postsend_info send_info = {};
- u8 data[DR_STE_SIZE];
- int num_of_sends_req;
- int ret;
- int i;
-
- /* Sending this amount of requests makes sure we will get drain */
- num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
-
- /* Send fake requests forcing the last to be signaled */
- send_info.write.addr = (uintptr_t)data;
- send_info.write.length = DR_STE_SIZE;
- send_info.write.lkey = 0;
- /* Using the sync_mr in order to write/read */
- send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
- send_info.rkey = send_ring->sync_mr->mkey;
-
- for (i = 0; i < num_of_sends_req; i++) {
- ret = dr_postsend_icm_data(dmn, &send_info);
- if (ret)
- return ret;
- }
-
- spin_lock(&send_ring->lock);
- ret = dr_handle_pending_wc(dmn, send_ring);
- spin_unlock(&send_ring->lock);
-
- return ret;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h
index 7618c6147f86..cc328292bf84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h
@@ -1473,7 +1473,6 @@ struct mlx5dr_send_ring {
int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
struct mlx5dr_send_ring *send_ring);
-int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn);
int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn,
struct mlx5dr_ste *ste,
u8 *data,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h
index 0bb3724c10c2..fc8a2169d1a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h
@@ -45,8 +45,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type);
int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
-int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
-
void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn,
u16 peer_vhca_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 0d5f750faa45..d10d4c396040 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1199,6 +1199,31 @@ int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *ou
}
EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap);
+int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ *vhca_id = 0;
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_vport_get_other_func_general_cap(dev, vport, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap,
u16 vport, u16 opmod)
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 5b44c931b660..058dcabfaa2e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -2214,6 +2214,8 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
+ mlxsw_pci_wqe_ipcs_set(wqe, skb->ip_summed == CHECKSUM_PARTIAL);
+
/* Everything is set up, ring producer doorbell to get HW going */
q->producer_counter++;
mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index 6bed495dcf0f..7fa94e5828de 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -90,6 +90,11 @@ MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1);
*/
MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4);
+/* pci_wqe_ipcs
+ * Calculate IPv4 and TCP / UDP checksums.
+ */
+MLXSW_ITEM32(pci, wqe, ipcs, 0x00, 14, 1);
+
/* pci_wqe_byte_count
* Size of i-th scatter/gather entry, 0 if entry is unused.
*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d714311fd884..1f8362788c75 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1574,8 +1574,10 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port,
netif_carrier_off(dev);
dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER |
- NETIF_F_HW_TC;
- dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK;
+ NETIF_F_HW_TC | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK |
+ NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ dev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
dev->lltx = true;
dev->netns_local = true;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index b10f80fc651b..fa7082ee5183 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -754,9 +754,6 @@ void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
struct net_device *dev);
-bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *dev);
-u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev);
u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 7d6d859cef3f..464821dd492d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -8184,41 +8184,6 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
return NULL;
}
-bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *dev)
-{
- struct mlxsw_sp_rif *rif;
-
- mutex_lock(&mlxsw_sp->router->lock);
- rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
- mutex_unlock(&mlxsw_sp->router->lock);
-
- return rif;
-}
-
-u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
-{
- struct mlxsw_sp_rif *rif;
- u16 vid = 0;
-
- mutex_lock(&mlxsw_sp->router->lock);
- rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
- if (!rif)
- goto out;
-
- /* We only return the VID for VLAN RIFs. Otherwise we return an
- * invalid value (0).
- */
- if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
- goto out;
-
- vid = mlxsw_sp_fid_8021q_vid(rif->fid);
-
-out:
- mutex_unlock(&mlxsw_sp->router->lock);
- return vid;
-}
-
static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
{
char ritr_pl[MLXSW_REG_RITR_LEN];
@@ -8417,19 +8382,6 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
return lb_rif->common.rif_index;
}
-u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
-{
- struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
- u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
- struct mlxsw_sp_vr *ul_vr;
-
- ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
- if (WARN_ON(IS_ERR(ul_vr)))
- return 0;
-
- return ul_vr->id;
-}
-
u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
{
return lb_rif->ul_rif_id;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 0432c7cc6b07..313efab5c324 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -90,7 +90,6 @@ struct mlxsw_sp_ipip_entry;
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
-u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif);
u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile
index 239b2258ec65..0dbc634adb4b 100644
--- a/drivers/net/ethernet/meta/fbnic/Makefile
+++ b/drivers/net/ethernet/meta/fbnic/Makefile
@@ -20,6 +20,7 @@ fbnic-y := fbnic_csr.o \
fbnic_pci.o \
fbnic_phylink.o \
fbnic_rpc.o \
+ fbnic_time.o \
fbnic_tlv.o \
fbnic_txrx.o \
- fbnic_time.o
+# End of objects
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h
index 14751f16e125..37f81db1fc30 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic.h
@@ -60,6 +60,12 @@ struct fbnic_dev {
u8 mac_addr_boundary;
u8 tce_tcam_last;
+ /* IP TCAM */
+ struct fbnic_ip_addr ip_src[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES];
+ struct fbnic_ip_addr ip_dst[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES];
+ struct fbnic_ip_addr ipo_src[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES];
+ struct fbnic_ip_addr ipo_dst[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES];
+
/* Number of TCQs/RCQs available on hardware */
u16 max_num_queues;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
index 02bb81b3c506..6f24c5f2e175 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
@@ -605,8 +605,11 @@ enum {
FBNIC_RPC_ACT_TBL0_DEST_EI = 4,
};
+#define FBNIC_RPC_ACT_TBL0_Q_SEL CSR_BIT(4)
+#define FBNIC_RPC_ACT_TBL0_Q_ID CSR_GENMASK(15, 8)
#define FBNIC_RPC_ACT_TBL0_DMA_HINT CSR_GENMASK(24, 16)
#define FBNIC_RPC_ACT_TBL0_TS_ENA CSR_BIT(28)
+#define FBNIC_RPC_ACT_TBL0_ACT_TBL_IDX CSR_BIT(29)
#define FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID CSR_BIT(30)
#define FBNIC_RPC_ACT_TBL1_DEFAULT 0x0840b /* 0x2102c */
@@ -677,6 +680,9 @@ enum {
#define FBNIC_RPC_TCAM_OUTER_IPSRC(m, n)\
(0x08c00 + 0x08 * (n) + (m)) /* 0x023000 + 32*n + 4*m */
+#define FBNIC_RPC_TCAM_IP_ADDR_VALUE CSR_GENMASK(15, 0)
+#define FBNIC_RPC_TCAM_IP_ADDR_MASK CSR_GENMASK(31, 16)
+
#define FBNIC_RPC_TCAM_OUTER_IPDST(m, n)\
(0x08c48 + 0x08 * (n) + (m)) /* 0x023120 + 32*n + 4*m */
#define FBNIC_RPC_TCAM_IPSRC(m, n)\
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
index 59951b5abdb7..e8f2d7f2d962 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
@@ -10,6 +10,166 @@
static struct dentry *fbnic_dbg_root;
+static void fbnic_dbg_desc_break(struct seq_file *s, int i)
+{
+ while (i--)
+ seq_putc(s, '-');
+
+ seq_putc(s, '\n');
+}
+
+static int fbnic_dbg_mac_addr_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+ char hdr[80];
+ int i;
+
+ /* Generate Header */
+ snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n",
+ "Idx", "S", "TCAM Bitmap", "Addr/Mask");
+ seq_puts(s, hdr);
+ fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr)));
+
+ for (i = 0; i < FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES; i++) {
+ struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i];
+
+ seq_printf(s, "%02d %d %64pb %pm\n",
+ i, mac_addr->state, mac_addr->act_tcam,
+ mac_addr->value.addr8);
+ seq_printf(s, " %pm\n",
+ mac_addr->mask.addr8);
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_mac_addr);
+
+static int fbnic_dbg_tce_tcam_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+ int i, tcam_idx = 0;
+ char hdr[80];
+
+ /* Generate Header */
+ snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n",
+ "Idx", "S", "TCAM Bitmap", "Addr/Mask");
+ seq_puts(s, hdr);
+ fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr)));
+
+ for (i = 0; i < ARRAY_SIZE(fbd->mac_addr); i++) {
+ struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i];
+
+ /* Verify BMC bit is set */
+ if (!test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam))
+ continue;
+
+ if (tcam_idx == FBNIC_TCE_TCAM_NUM_ENTRIES)
+ break;
+
+ seq_printf(s, "%02d %d %64pb %pm\n",
+ tcam_idx, mac_addr->state, mac_addr->act_tcam,
+ mac_addr->value.addr8);
+ seq_printf(s, " %pm\n",
+ mac_addr->mask.addr8);
+ tcam_idx++;
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_tce_tcam);
+
+static int fbnic_dbg_act_tcam_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+ char hdr[80];
+ int i;
+
+ /* Generate Header */
+ snprintf(hdr, sizeof(hdr), "%3s %s %-55s %-4s %s\n",
+ "Idx", "S", "Value/Mask", "RSS", "Dest");
+ seq_puts(s, hdr);
+ fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr)));
+
+ for (i = 0; i < FBNIC_RPC_TCAM_ACT_NUM_ENTRIES; i++) {
+ struct fbnic_act_tcam *act_tcam = &fbd->act_tcam[i];
+
+ seq_printf(s, "%02d %d %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %08x\n",
+ i, act_tcam->state,
+ act_tcam->value.tcam[10], act_tcam->value.tcam[9],
+ act_tcam->value.tcam[8], act_tcam->value.tcam[7],
+ act_tcam->value.tcam[6], act_tcam->value.tcam[5],
+ act_tcam->value.tcam[4], act_tcam->value.tcam[3],
+ act_tcam->value.tcam[2], act_tcam->value.tcam[1],
+ act_tcam->value.tcam[0], act_tcam->rss_en_mask,
+ act_tcam->dest);
+ seq_printf(s, " %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x\n",
+ act_tcam->mask.tcam[10], act_tcam->mask.tcam[9],
+ act_tcam->mask.tcam[8], act_tcam->mask.tcam[7],
+ act_tcam->mask.tcam[6], act_tcam->mask.tcam[5],
+ act_tcam->mask.tcam[4], act_tcam->mask.tcam[3],
+ act_tcam->mask.tcam[2], act_tcam->mask.tcam[1],
+ act_tcam->mask.tcam[0]);
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_act_tcam);
+
+static int fbnic_dbg_ip_addr_show(struct seq_file *s,
+ struct fbnic_ip_addr *ip_addr)
+{
+ char hdr[80];
+ int i;
+
+ /* Generate Header */
+ snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s %s\n",
+ "Idx", "S", "TCAM Bitmap", "V", "Addr/Mask");
+ seq_puts(s, hdr);
+ fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr)));
+
+ for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i++, ip_addr++) {
+ seq_printf(s, "%02d %d %64pb %d %pi6\n",
+ i, ip_addr->state, ip_addr->act_tcam,
+ ip_addr->version, &ip_addr->value);
+ seq_printf(s, " %pi6\n",
+ &ip_addr->mask);
+ }
+
+ return 0;
+}
+
+static int fbnic_dbg_ip_src_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+
+ return fbnic_dbg_ip_addr_show(s, fbd->ip_src);
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ip_src);
+
+static int fbnic_dbg_ip_dst_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+
+ return fbnic_dbg_ip_addr_show(s, fbd->ip_dst);
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ip_dst);
+
+static int fbnic_dbg_ipo_src_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+
+ return fbnic_dbg_ip_addr_show(s, fbd->ipo_src);
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ipo_src);
+
+static int fbnic_dbg_ipo_dst_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+
+ return fbnic_dbg_ip_addr_show(s, fbd->ipo_dst);
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ipo_dst);
+
static int fbnic_dbg_pcie_stats_show(struct seq_file *s, void *v)
{
struct fbnic_dev *fbd = s->private;
@@ -48,6 +208,20 @@ void fbnic_dbg_fbd_init(struct fbnic_dev *fbd)
fbd->dbg_fbd = debugfs_create_dir(name, fbnic_dbg_root);
debugfs_create_file("pcie_stats", 0400, fbd->dbg_fbd, fbd,
&fbnic_dbg_pcie_stats_fops);
+ debugfs_create_file("mac_addr", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_mac_addr_fops);
+ debugfs_create_file("tce_tcam", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_tce_tcam_fops);
+ debugfs_create_file("act_tcam", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_act_tcam_fops);
+ debugfs_create_file("ip_src", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_ip_src_fops);
+ debugfs_create_file("ip_dst", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_ip_dst_fops);
+ debugfs_create_file("ipo_src", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_ipo_src_fops);
+ debugfs_create_file("ipo_dst", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_ipo_dst_fops);
}
void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 20cd9f5f89e2..fb7139a1da46 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -4,6 +4,7 @@
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
+#include <net/ipv6.h>
#include "fbnic.h"
#include "fbnic_netdev.h"
@@ -218,11 +219,234 @@ fbnic_get_rss_hash_opts(struct fbnic_net *fbn, struct ethtool_rxnfc *cmd)
return 0;
}
+static int fbnic_get_cls_rule_all(struct fbnic_net *fbn,
+ struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct fbnic_dev *fbd = fbn->fbd;
+ int i, cnt = 0;
+
+ /* Report maximum rule count */
+ cmd->data = FBNIC_RPC_ACT_TBL_NFC_ENTRIES;
+
+ for (i = 0; i < FBNIC_RPC_ACT_TBL_NFC_ENTRIES; i++) {
+ int idx = i + FBNIC_RPC_ACT_TBL_NFC_OFFSET;
+ struct fbnic_act_tcam *act_tcam;
+
+ act_tcam = &fbd->act_tcam[idx];
+ if (act_tcam->state != FBNIC_TCAM_S_VALID)
+ continue;
+
+ if (rule_locs) {
+ if (cnt == cmd->rule_cnt)
+ return -EMSGSIZE;
+
+ rule_locs[cnt] = i;
+ }
+
+ cnt++;
+ }
+
+ return cnt;
+}
+
+static int fbnic_get_cls_rule(struct fbnic_net *fbn, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp;
+ struct fbnic_dev *fbd = fbn->fbd;
+ struct fbnic_act_tcam *act_tcam;
+ int idx;
+
+ fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ if (fsp->location >= FBNIC_RPC_ACT_TBL_NFC_ENTRIES)
+ return -EINVAL;
+
+ idx = fsp->location + FBNIC_RPC_ACT_TBL_NFC_OFFSET;
+ act_tcam = &fbd->act_tcam[idx];
+
+ if (act_tcam->state != FBNIC_TCAM_S_VALID)
+ return -EINVAL;
+
+ /* Report maximum rule count */
+ cmd->data = FBNIC_RPC_ACT_TBL_NFC_ENTRIES;
+
+ /* Set flow type field */
+ if (!(act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_IP_VALID)) {
+ fsp->flow_type = ETHER_FLOW;
+ if (!FIELD_GET(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX,
+ act_tcam->mask.tcam[1])) {
+ struct fbnic_mac_addr *mac_addr;
+
+ idx = FIELD_GET(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX,
+ act_tcam->value.tcam[1]);
+ mac_addr = &fbd->mac_addr[idx];
+
+ ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+ mac_addr->value.addr8);
+ eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+ }
+ } else if (act_tcam->value.tcam[1] &
+ FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID) {
+ fsp->flow_type = IPV6_USER_FLOW;
+ fsp->h_u.usr_ip6_spec.l4_proto = IPPROTO_IPV6;
+ fsp->m_u.usr_ip6_spec.l4_proto = 0xff;
+
+ if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX,
+ act_tcam->mask.tcam[0])) {
+ struct fbnic_ip_addr *ip_addr;
+ int i;
+
+ idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX,
+ act_tcam->value.tcam[0]);
+ ip_addr = &fbd->ipo_src[idx];
+
+ for (i = 0; i < 4; i++) {
+ fsp->h_u.usr_ip6_spec.ip6src[i] =
+ ip_addr->value.s6_addr32[i];
+ fsp->m_u.usr_ip6_spec.ip6src[i] =
+ ~ip_addr->mask.s6_addr32[i];
+ }
+ }
+
+ if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX,
+ act_tcam->mask.tcam[0])) {
+ struct fbnic_ip_addr *ip_addr;
+ int i;
+
+ idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX,
+ act_tcam->value.tcam[0]);
+ ip_addr = &fbd->ipo_dst[idx];
+
+ for (i = 0; i < 4; i++) {
+ fsp->h_u.usr_ip6_spec.ip6dst[i] =
+ ip_addr->value.s6_addr32[i];
+ fsp->m_u.usr_ip6_spec.ip6dst[i] =
+ ~ip_addr->mask.s6_addr32[i];
+ }
+ }
+ } else if ((act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_IP_IS_V6)) {
+ if (act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L4_VALID) {
+ if (act_tcam->value.tcam[1] &
+ FBNIC_RPC_TCAM_ACT1_L4_IS_UDP)
+ fsp->flow_type = UDP_V6_FLOW;
+ else
+ fsp->flow_type = TCP_V6_FLOW;
+ fsp->h_u.tcp_ip6_spec.psrc =
+ cpu_to_be16(act_tcam->value.tcam[3]);
+ fsp->m_u.tcp_ip6_spec.psrc =
+ cpu_to_be16(~act_tcam->mask.tcam[3]);
+ fsp->h_u.tcp_ip6_spec.pdst =
+ cpu_to_be16(act_tcam->value.tcam[4]);
+ fsp->m_u.tcp_ip6_spec.pdst =
+ cpu_to_be16(~act_tcam->mask.tcam[4]);
+ } else {
+ fsp->flow_type = IPV6_USER_FLOW;
+ }
+
+ if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX,
+ act_tcam->mask.tcam[0])) {
+ struct fbnic_ip_addr *ip_addr;
+ int i;
+
+ idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX,
+ act_tcam->value.tcam[0]);
+ ip_addr = &fbd->ip_src[idx];
+
+ for (i = 0; i < 4; i++) {
+ fsp->h_u.usr_ip6_spec.ip6src[i] =
+ ip_addr->value.s6_addr32[i];
+ fsp->m_u.usr_ip6_spec.ip6src[i] =
+ ~ip_addr->mask.s6_addr32[i];
+ }
+ }
+
+ if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX,
+ act_tcam->mask.tcam[0])) {
+ struct fbnic_ip_addr *ip_addr;
+ int i;
+
+ idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX,
+ act_tcam->value.tcam[0]);
+ ip_addr = &fbd->ip_dst[idx];
+
+ for (i = 0; i < 4; i++) {
+ fsp->h_u.usr_ip6_spec.ip6dst[i] =
+ ip_addr->value.s6_addr32[i];
+ fsp->m_u.usr_ip6_spec.ip6dst[i] =
+ ~ip_addr->mask.s6_addr32[i];
+ }
+ }
+ } else {
+ if (act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L4_VALID) {
+ if (act_tcam->value.tcam[1] &
+ FBNIC_RPC_TCAM_ACT1_L4_IS_UDP)
+ fsp->flow_type = UDP_V4_FLOW;
+ else
+ fsp->flow_type = TCP_V4_FLOW;
+ fsp->h_u.tcp_ip4_spec.psrc =
+ cpu_to_be16(act_tcam->value.tcam[3]);
+ fsp->m_u.tcp_ip4_spec.psrc =
+ cpu_to_be16(~act_tcam->mask.tcam[3]);
+ fsp->h_u.tcp_ip4_spec.pdst =
+ cpu_to_be16(act_tcam->value.tcam[4]);
+ fsp->m_u.tcp_ip4_spec.pdst =
+ cpu_to_be16(~act_tcam->mask.tcam[4]);
+ } else {
+ fsp->flow_type = IPV4_USER_FLOW;
+ fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+ }
+
+ if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX,
+ act_tcam->mask.tcam[0])) {
+ struct fbnic_ip_addr *ip_addr;
+
+ idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX,
+ act_tcam->value.tcam[0]);
+ ip_addr = &fbd->ip_src[idx];
+
+ fsp->h_u.usr_ip4_spec.ip4src =
+ ip_addr->value.s6_addr32[3];
+ fsp->m_u.usr_ip4_spec.ip4src =
+ ~ip_addr->mask.s6_addr32[3];
+ }
+
+ if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX,
+ act_tcam->mask.tcam[0])) {
+ struct fbnic_ip_addr *ip_addr;
+
+ idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX,
+ act_tcam->value.tcam[0]);
+ ip_addr = &fbd->ip_dst[idx];
+
+ fsp->h_u.usr_ip4_spec.ip4dst =
+ ip_addr->value.s6_addr32[3];
+ fsp->m_u.usr_ip4_spec.ip4dst =
+ ~ip_addr->mask.s6_addr32[3];
+ }
+ }
+
+ /* Record action */
+ if (act_tcam->dest & FBNIC_RPC_ACT_TBL0_DROP)
+ fsp->ring_cookie = RX_CLS_FLOW_DISC;
+ else if (act_tcam->dest & FBNIC_RPC_ACT_TBL0_Q_SEL)
+ fsp->ring_cookie = FIELD_GET(FBNIC_RPC_ACT_TBL0_Q_ID,
+ act_tcam->dest);
+ else
+ fsp->flow_type |= FLOW_RSS;
+
+ cmd->rss_context = FIELD_GET(FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID,
+ act_tcam->dest);
+
+ return 0;
+}
+
static int fbnic_get_rxnfc(struct net_device *netdev,
struct ethtool_rxnfc *cmd, u32 *rule_locs)
{
struct fbnic_net *fbn = netdev_priv(netdev);
int ret = -EOPNOTSUPP;
+ u32 special = 0;
switch (cmd->cmd) {
case ETHTOOL_GRXRINGS:
@@ -232,6 +456,22 @@ static int fbnic_get_rxnfc(struct net_device *netdev,
case ETHTOOL_GRXFH:
ret = fbnic_get_rss_hash_opts(fbn, cmd);
break;
+ case ETHTOOL_GRXCLSRULE:
+ ret = fbnic_get_cls_rule(fbn, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ rule_locs = NULL;
+ special = RX_CLS_LOC_SPECIAL;
+ fallthrough;
+ case ETHTOOL_GRXCLSRLALL:
+ ret = fbnic_get_cls_rule_all(fbn, cmd, rule_locs);
+ if (ret < 0)
+ break;
+
+ cmd->data |= special;
+ cmd->rule_cnt = ret;
+ ret = 0;
+ break;
}
return ret;
@@ -272,6 +512,406 @@ fbnic_set_rss_hash_opts(struct fbnic_net *fbn, const struct ethtool_rxnfc *cmd)
return 0;
}
+static int fbnic_cls_rule_any_loc(struct fbnic_dev *fbd)
+{
+ int i;
+
+ for (i = FBNIC_RPC_ACT_TBL_NFC_ENTRIES; i--;) {
+ int idx = i + FBNIC_RPC_ACT_TBL_NFC_OFFSET;
+
+ if (fbd->act_tcam[idx].state != FBNIC_TCAM_S_VALID)
+ return i;
+ }
+
+ return -ENOSPC;
+}
+
+static int fbnic_set_cls_rule_ins(struct fbnic_net *fbn,
+ const struct ethtool_rxnfc *cmd)
+{
+ u16 flow_value = 0, flow_mask = 0xffff, ip_value = 0, ip_mask = 0xffff;
+ u16 sport = 0, sport_mask = ~0, dport = 0, dport_mask = ~0;
+ u16 misc = 0, misc_mask = ~0;
+ u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK,
+ FBNIC_RPC_ACT_TBL0_DEST_HOST);
+ struct fbnic_ip_addr *ip_src = NULL, *ip_dst = NULL;
+ struct fbnic_mac_addr *mac_addr = NULL;
+ struct ethtool_rx_flow_spec *fsp;
+ struct fbnic_dev *fbd = fbn->fbd;
+ struct fbnic_act_tcam *act_tcam;
+ struct in6_addr *addr6, *mask6;
+ struct in_addr *addr4, *mask4;
+ int hash_idx, location;
+ u32 flow_type;
+ int idx, j;
+
+ fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ if (fsp->location != RX_CLS_LOC_ANY)
+ return -EINVAL;
+ location = fbnic_cls_rule_any_loc(fbd);
+ if (location < 0)
+ return location;
+
+ if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+ dest = FBNIC_RPC_ACT_TBL0_DROP;
+ } else if (fsp->flow_type & FLOW_RSS) {
+ if (cmd->rss_context == 1)
+ dest |= FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID;
+ } else {
+ u32 ring_idx = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+
+ if (ring_idx >= fbn->num_rx_queues)
+ return -EINVAL;
+
+ dest |= FBNIC_RPC_ACT_TBL0_Q_SEL |
+ FIELD_PREP(FBNIC_RPC_ACT_TBL0_Q_ID, ring_idx);
+ }
+
+ idx = location + FBNIC_RPC_ACT_TBL_NFC_OFFSET;
+ act_tcam = &fbd->act_tcam[idx];
+
+ /* Do not allow overwriting for now.
+ * To support overwriting rules we will need to add logic to free
+ * any IP or MACDA TCAMs that may be associated with the old rule.
+ */
+ if (act_tcam->state != FBNIC_TCAM_S_DISABLED)
+ return -EBUSY;
+
+ flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_RSS);
+ hash_idx = fbnic_get_rss_hash_idx(flow_type);
+
+ switch (flow_type) {
+ case UDP_V4_FLOW:
+udp4_flow:
+ flow_value |= FBNIC_RPC_TCAM_ACT1_L4_IS_UDP;
+ fallthrough;
+ case TCP_V4_FLOW:
+tcp4_flow:
+ flow_value |= FBNIC_RPC_TCAM_ACT1_L4_VALID;
+ flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_L4_IS_UDP |
+ FBNIC_RPC_TCAM_ACT1_L4_VALID);
+
+ sport = be16_to_cpu(fsp->h_u.tcp_ip4_spec.psrc);
+ sport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip4_spec.psrc);
+ dport = be16_to_cpu(fsp->h_u.tcp_ip4_spec.pdst);
+ dport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip4_spec.pdst);
+ goto ip4_flow;
+ case IP_USER_FLOW:
+ if (!fsp->m_u.usr_ip4_spec.proto)
+ goto ip4_flow;
+ if (fsp->m_u.usr_ip4_spec.proto != 0xff)
+ return -EINVAL;
+ if (fsp->h_u.usr_ip4_spec.proto == IPPROTO_UDP)
+ goto udp4_flow;
+ if (fsp->h_u.usr_ip4_spec.proto == IPPROTO_TCP)
+ goto tcp4_flow;
+ return -EINVAL;
+ip4_flow:
+ addr4 = (struct in_addr *)&fsp->h_u.usr_ip4_spec.ip4src;
+ mask4 = (struct in_addr *)&fsp->m_u.usr_ip4_spec.ip4src;
+ if (mask4->s_addr) {
+ ip_src = __fbnic_ip4_sync(fbd, fbd->ip_src,
+ addr4, mask4);
+ if (!ip_src)
+ return -ENOSPC;
+
+ set_bit(idx, ip_src->act_tcam);
+ ip_value |= FBNIC_RPC_TCAM_ACT0_IPSRC_VALID |
+ FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX,
+ ip_src - fbd->ip_src);
+ ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPSRC_VALID |
+ FBNIC_RPC_TCAM_ACT0_IPSRC_IDX);
+ }
+
+ addr4 = (struct in_addr *)&fsp->h_u.usr_ip4_spec.ip4dst;
+ mask4 = (struct in_addr *)&fsp->m_u.usr_ip4_spec.ip4dst;
+ if (mask4->s_addr) {
+ ip_dst = __fbnic_ip4_sync(fbd, fbd->ip_dst,
+ addr4, mask4);
+ if (!ip_dst) {
+ if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD)
+ memset(ip_src, 0, sizeof(*ip_src));
+ return -ENOSPC;
+ }
+
+ set_bit(idx, ip_dst->act_tcam);
+ ip_value |= FBNIC_RPC_TCAM_ACT0_IPDST_VALID |
+ FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPDST_IDX,
+ ip_dst - fbd->ip_dst);
+ ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPDST_VALID |
+ FBNIC_RPC_TCAM_ACT0_IPDST_IDX);
+ }
+ flow_value |= FBNIC_RPC_TCAM_ACT1_IP_VALID |
+ FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID;
+ flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_IP_IS_V6 |
+ FBNIC_RPC_TCAM_ACT1_IP_VALID |
+ FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID);
+ break;
+ case UDP_V6_FLOW:
+udp6_flow:
+ flow_value |= FBNIC_RPC_TCAM_ACT1_L4_IS_UDP;
+ fallthrough;
+ case TCP_V6_FLOW:
+tcp6_flow:
+ flow_value |= FBNIC_RPC_TCAM_ACT1_L4_VALID;
+ flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_L4_IS_UDP |
+ FBNIC_RPC_TCAM_ACT1_L4_VALID);
+
+ sport = be16_to_cpu(fsp->h_u.tcp_ip6_spec.psrc);
+ sport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip6_spec.psrc);
+ dport = be16_to_cpu(fsp->h_u.tcp_ip6_spec.pdst);
+ dport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip6_spec.pdst);
+ goto ipv6_flow;
+ case IPV6_USER_FLOW:
+ if (!fsp->m_u.usr_ip6_spec.l4_proto)
+ goto ipv6_flow;
+
+ if (fsp->m_u.usr_ip6_spec.l4_proto != 0xff)
+ return -EINVAL;
+ if (fsp->h_u.usr_ip6_spec.l4_proto == IPPROTO_UDP)
+ goto udp6_flow;
+ if (fsp->h_u.usr_ip6_spec.l4_proto == IPPROTO_TCP)
+ goto tcp6_flow;
+ if (fsp->h_u.usr_ip6_spec.l4_proto != IPPROTO_IPV6)
+ return -EINVAL;
+
+ addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6src;
+ mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6src;
+ if (!ipv6_addr_any(mask6)) {
+ ip_src = __fbnic_ip6_sync(fbd, fbd->ipo_src,
+ addr6, mask6);
+ if (!ip_src)
+ return -ENOSPC;
+
+ set_bit(idx, ip_src->act_tcam);
+ ip_value |=
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID |
+ FIELD_PREP(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX,
+ ip_src - fbd->ipo_src);
+ ip_mask &=
+ ~(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID |
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX);
+ }
+
+ addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6dst;
+ mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6dst;
+ if (!ipv6_addr_any(mask6)) {
+ ip_dst = __fbnic_ip6_sync(fbd, fbd->ipo_dst,
+ addr6, mask6);
+ if (!ip_dst) {
+ if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD)
+ memset(ip_src, 0, sizeof(*ip_src));
+ return -ENOSPC;
+ }
+
+ set_bit(idx, ip_dst->act_tcam);
+ ip_value |=
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID |
+ FIELD_PREP(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX,
+ ip_dst - fbd->ipo_dst);
+ ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID |
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX);
+ }
+
+ flow_value |= FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID;
+ flow_mask &= FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID;
+ipv6_flow:
+ addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6src;
+ mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6src;
+ if (!ip_src && !ipv6_addr_any(mask6)) {
+ ip_src = __fbnic_ip6_sync(fbd, fbd->ip_src,
+ addr6, mask6);
+ if (!ip_src)
+ return -ENOSPC;
+
+ set_bit(idx, ip_src->act_tcam);
+ ip_value |= FBNIC_RPC_TCAM_ACT0_IPSRC_VALID |
+ FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX,
+ ip_src - fbd->ip_src);
+ ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPSRC_VALID |
+ FBNIC_RPC_TCAM_ACT0_IPSRC_IDX);
+ }
+
+ addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6dst;
+ mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6dst;
+ if (!ip_dst && !ipv6_addr_any(mask6)) {
+ ip_dst = __fbnic_ip6_sync(fbd, fbd->ip_dst,
+ addr6, mask6);
+ if (!ip_dst) {
+ if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD)
+ memset(ip_src, 0, sizeof(*ip_src));
+ return -ENOSPC;
+ }
+
+ set_bit(idx, ip_dst->act_tcam);
+ ip_value |= FBNIC_RPC_TCAM_ACT0_IPDST_VALID |
+ FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPDST_IDX,
+ ip_dst - fbd->ip_dst);
+ ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPDST_VALID |
+ FBNIC_RPC_TCAM_ACT0_IPDST_IDX);
+ }
+
+ flow_value |= FBNIC_RPC_TCAM_ACT1_IP_IS_V6 |
+ FBNIC_RPC_TCAM_ACT1_IP_VALID |
+ FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID;
+ flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_IP_IS_V6 |
+ FBNIC_RPC_TCAM_ACT1_IP_VALID |
+ FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID);
+ break;
+ case ETHER_FLOW:
+ if (!is_zero_ether_addr(fsp->m_u.ether_spec.h_dest)) {
+ u8 *addr = fsp->h_u.ether_spec.h_dest;
+ u8 *mask = fsp->m_u.ether_spec.h_dest;
+
+ /* Do not allow MAC addr of 0 */
+ if (is_zero_ether_addr(addr))
+ return -EINVAL;
+
+ /* Only support full MAC address to avoid
+ * conflicts with other MAC addresses.
+ */
+ if (!is_broadcast_ether_addr(mask))
+ return -EINVAL;
+
+ if (is_multicast_ether_addr(addr))
+ mac_addr = __fbnic_mc_sync(fbd, addr);
+ else
+ mac_addr = __fbnic_uc_sync(fbd, addr);
+
+ if (!mac_addr)
+ return -ENOSPC;
+
+ set_bit(idx, mac_addr->act_tcam);
+ flow_value |=
+ FIELD_PREP(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX,
+ mac_addr - fbd->mac_addr);
+ flow_mask &= ~FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX;
+ }
+
+ flow_value |= FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID;
+ flow_mask &= ~FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Write action table values */
+ act_tcam->dest = dest;
+ act_tcam->rss_en_mask = fbnic_flow_hash_2_rss_en_mask(fbn, hash_idx);
+
+ /* Write IP Match value/mask to action_tcam[0] */
+ act_tcam->value.tcam[0] = ip_value;
+ act_tcam->mask.tcam[0] = ip_mask;
+
+ /* Write flow type value/mask to action_tcam[1] */
+ act_tcam->value.tcam[1] = flow_value;
+ act_tcam->mask.tcam[1] = flow_mask;
+
+ /* Write error, DSCP, extra L4 matches to action_tcam[2] */
+ act_tcam->value.tcam[2] = misc;
+ act_tcam->mask.tcam[2] = misc_mask;
+
+ /* Write source/destination port values */
+ act_tcam->value.tcam[3] = sport;
+ act_tcam->mask.tcam[3] = sport_mask;
+ act_tcam->value.tcam[4] = dport;
+ act_tcam->mask.tcam[4] = dport_mask;
+
+ for (j = 5; j < FBNIC_RPC_TCAM_ACT_WORD_LEN; j++)
+ act_tcam->mask.tcam[j] = 0xffff;
+
+ act_tcam->state = FBNIC_TCAM_S_UPDATE;
+ fsp->location = location;
+
+ if (netif_running(fbn->netdev)) {
+ fbnic_write_rules(fbd);
+ if (ip_src || ip_dst)
+ fbnic_write_ip_addr(fbd);
+ if (mac_addr)
+ fbnic_write_macda(fbd);
+ }
+
+ return 0;
+}
+
+static void fbnic_clear_nfc_macda(struct fbnic_net *fbn,
+ unsigned int tcam_idx)
+{
+ struct fbnic_dev *fbd = fbn->fbd;
+ int idx;
+
+ for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;)
+ __fbnic_xc_unsync(&fbd->mac_addr[idx], tcam_idx);
+
+ /* Write updates to hardware */
+ if (netif_running(fbn->netdev))
+ fbnic_write_macda(fbd);
+}
+
+static void fbnic_clear_nfc_ip_addr(struct fbnic_net *fbn,
+ unsigned int tcam_idx)
+{
+ struct fbnic_dev *fbd = fbn->fbd;
+ int idx;
+
+ for (idx = ARRAY_SIZE(fbd->ip_src); idx--;)
+ __fbnic_ip_unsync(&fbd->ip_src[idx], tcam_idx);
+ for (idx = ARRAY_SIZE(fbd->ip_dst); idx--;)
+ __fbnic_ip_unsync(&fbd->ip_dst[idx], tcam_idx);
+ for (idx = ARRAY_SIZE(fbd->ipo_src); idx--;)
+ __fbnic_ip_unsync(&fbd->ipo_src[idx], tcam_idx);
+ for (idx = ARRAY_SIZE(fbd->ipo_dst); idx--;)
+ __fbnic_ip_unsync(&fbd->ipo_dst[idx], tcam_idx);
+
+ /* Write updates to hardware */
+ if (netif_running(fbn->netdev))
+ fbnic_write_ip_addr(fbd);
+}
+
+static int fbnic_set_cls_rule_del(struct fbnic_net *fbn,
+ const struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp;
+ struct fbnic_dev *fbd = fbn->fbd;
+ struct fbnic_act_tcam *act_tcam;
+ int idx;
+
+ fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ if (fsp->location >= FBNIC_RPC_ACT_TBL_NFC_ENTRIES)
+ return -EINVAL;
+
+ idx = fsp->location + FBNIC_RPC_ACT_TBL_NFC_OFFSET;
+ act_tcam = &fbd->act_tcam[idx];
+
+ if (act_tcam->state != FBNIC_TCAM_S_VALID)
+ return -EINVAL;
+
+ act_tcam->state = FBNIC_TCAM_S_DELETE;
+
+ if ((act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID) &&
+ (~act_tcam->mask.tcam[1] & FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX))
+ fbnic_clear_nfc_macda(fbn, idx);
+
+ if ((act_tcam->value.tcam[0] &
+ (FBNIC_RPC_TCAM_ACT0_IPSRC_VALID |
+ FBNIC_RPC_TCAM_ACT0_IPDST_VALID |
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID |
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID)) &&
+ (~act_tcam->mask.tcam[0] &
+ (FBNIC_RPC_TCAM_ACT0_IPSRC_IDX |
+ FBNIC_RPC_TCAM_ACT0_IPDST_IDX |
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX |
+ FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX)))
+ fbnic_clear_nfc_ip_addr(fbn, idx);
+
+ if (netif_running(fbn->netdev))
+ fbnic_write_rules(fbd);
+
+ return 0;
+}
+
static int fbnic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
{
struct fbnic_net *fbn = netdev_priv(netdev);
@@ -281,6 +921,12 @@ static int fbnic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
case ETHTOOL_SRXFH:
ret = fbnic_set_rss_hash_opts(fbn, cmd);
break;
+ case ETHTOOL_SRXCLSRLINS:
+ ret = fbnic_set_cls_rule_ins(fbn, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ ret = fbnic_set_cls_rule_del(fbn, cmd);
+ break;
}
return ret;
@@ -374,6 +1020,61 @@ fbnic_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh,
return 0;
}
+static int
+fbnic_modify_rxfh_context(struct net_device *netdev,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+ const u32 *indir = rxfh->indir;
+ unsigned int changes;
+
+ if (!indir)
+ indir = ethtool_rxfh_context_indir(ctx);
+
+ changes = fbnic_set_indir(fbn, rxfh->rss_context, indir);
+ if (changes && netif_running(netdev))
+ fbnic_rss_reinit_hw(fbn->fbd, fbn);
+
+ return 0;
+}
+
+static int
+fbnic_create_rxfh_context(struct net_device *netdev,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+
+ if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) {
+ NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported");
+ return -EOPNOTSUPP;
+ }
+ ctx->hfunc = ETH_RSS_HASH_TOP;
+
+ if (!rxfh->indir) {
+ u32 *indir = ethtool_rxfh_context_indir(ctx);
+ unsigned int num_rx = fbn->num_rx_queues;
+ unsigned int i;
+
+ for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, num_rx);
+ }
+
+ return fbnic_modify_rxfh_context(netdev, ctx, rxfh, extack);
+}
+
+static int
+fbnic_remove_rxfh_context(struct net_device *netdev,
+ struct ethtool_rxfh_context *ctx, u32 rss_context,
+ struct netlink_ext_ack *extack)
+{
+ /* Nothing to do, contexts are allocated statically */
+ return 0;
+}
+
static void fbnic_get_channels(struct net_device *netdev,
struct ethtool_channels *ch)
{
@@ -523,14 +1224,14 @@ static void fbnic_get_ts_stats(struct net_device *netdev,
unsigned int start;
int i;
- ts_stats->pkts = fbn->tx_stats.ts_packets;
- ts_stats->lost = fbn->tx_stats.ts_lost;
+ ts_stats->pkts = fbn->tx_stats.twq.ts_packets;
+ ts_stats->lost = fbn->tx_stats.twq.ts_lost;
for (i = 0; i < fbn->num_tx_queues; i++) {
ring = fbn->tx[i];
do {
start = u64_stats_fetch_begin(&ring->stats.syncp);
- ts_packets = ring->stats.ts_packets;
- ts_lost = ring->stats.ts_lost;
+ ts_packets = ring->stats.twq.ts_packets;
+ ts_lost = ring->stats.twq.ts_lost;
} while (u64_stats_fetch_retry(&ring->stats.syncp, start));
ts_stats->pkts += ts_packets;
ts_stats->lost += ts_lost;
@@ -586,6 +1287,7 @@ fbnic_get_eth_mac_stats(struct net_device *netdev,
}
static const struct ethtool_ops fbnic_ethtool_ops = {
+ .rxfh_max_num_contexts = FBNIC_RPC_RSS_TBL_COUNT,
.get_drvinfo = fbnic_get_drvinfo,
.get_regs_len = fbnic_get_regs_len,
.get_regs = fbnic_get_regs,
@@ -598,6 +1300,9 @@ static const struct ethtool_ops fbnic_ethtool_ops = {
.get_rxfh_indir_size = fbnic_get_rxfh_indir_size,
.get_rxfh = fbnic_get_rxfh,
.set_rxfh = fbnic_set_rxfh,
+ .create_rxfh_context = fbnic_create_rxfh_context,
+ .modify_rxfh_context = fbnic_modify_rxfh_context,
+ .remove_rxfh_context = fbnic_remove_rxfh_context,
.get_channels = fbnic_get_channels,
.set_channels = fbnic_set_channels,
.get_ts_info = fbnic_get_ts_info,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index 7a96b6ee773f..c59f1ce8de32 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -487,8 +487,9 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
struct fbnic_net *fbn = netdev_priv(dev);
struct fbnic_ring *rxr = fbn->rx[idx];
struct fbnic_queue_stats *stats;
+ u64 bytes, packets, alloc_fail;
+ u64 csum_complete, csum_none;
unsigned int start;
- u64 bytes, packets;
if (!rxr)
return;
@@ -498,10 +499,16 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
start = u64_stats_fetch_begin(&stats->syncp);
bytes = stats->bytes;
packets = stats->packets;
+ alloc_fail = stats->rx.alloc_failed;
+ csum_complete = stats->rx.csum_complete;
+ csum_none = stats->rx.csum_none;
} while (u64_stats_fetch_retry(&stats->syncp, start));
rx->bytes = bytes;
rx->packets = packets;
+ rx->alloc_fail = alloc_fail;
+ rx->csum_complete = csum_complete;
+ rx->csum_none = csum_none;
}
static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
@@ -510,6 +517,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
struct fbnic_net *fbn = netdev_priv(dev);
struct fbnic_ring *txr = fbn->tx[idx];
struct fbnic_queue_stats *stats;
+ u64 stop, wake, csum, lso;
unsigned int start;
u64 bytes, packets;
@@ -521,10 +529,18 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
start = u64_stats_fetch_begin(&stats->syncp);
bytes = stats->bytes;
packets = stats->packets;
+ csum = stats->twq.csum_partial;
+ lso = stats->twq.lso;
+ stop = stats->twq.stop;
+ wake = stats->twq.wake;
} while (u64_stats_fetch_retry(&stats->syncp, start));
tx->bytes = bytes;
tx->packets = packets;
+ tx->needs_csum = csum + lso;
+ tx->hw_gso_wire_packets = lso;
+ tx->stop = stop;
+ tx->wake = wake;
}
static void fbnic_get_base_stats(struct net_device *dev,
@@ -535,9 +551,16 @@ static void fbnic_get_base_stats(struct net_device *dev,
tx->bytes = fbn->tx_stats.bytes;
tx->packets = fbn->tx_stats.packets;
+ tx->needs_csum = fbn->tx_stats.twq.csum_partial + fbn->tx_stats.twq.lso;
+ tx->hw_gso_wire_packets = fbn->tx_stats.twq.lso;
+ tx->stop = fbn->tx_stats.twq.stop;
+ tx->wake = fbn->tx_stats.twq.wake;
rx->bytes = fbn->rx_stats.bytes;
rx->packets = fbn->rx_stats.packets;
+ rx->alloc_fail = fbn->rx_stats.rx.alloc_failed;
+ rx->csum_complete = fbn->rx_stats.rx.csum_complete;
+ rx->csum_none = fbn->rx_stats.rx.csum_none;
}
static const struct netdev_stat_ops fbnic_stat_ops = {
@@ -628,15 +651,32 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
fbnic_rss_key_fill(fbn->rss_key);
fbnic_rss_init_en_mask(fbn);
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+ netdev->gso_partial_features =
+ NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM |
+ NETIF_F_GSO_IPXIP4 |
+ NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
netdev->features |=
+ netdev->gso_partial_features |
+ FBNIC_TUN_GSO_FEATURES |
NETIF_F_RXHASH |
NETIF_F_SG |
NETIF_F_HW_CSUM |
- NETIF_F_RXCSUM;
+ NETIF_F_RXCSUM |
+ NETIF_F_TSO |
+ NETIF_F_TSO_ECN |
+ NETIF_F_TSO6 |
+ NETIF_F_GSO_PARTIAL |
+ NETIF_F_GSO_UDP_L4;
netdev->hw_features |= netdev->features;
netdev->vlan_features |= netdev->features;
netdev->hw_enc_features |= netdev->features;
+ netdev->features |= NETIF_F_NTUPLE;
netdev->min_mtu = IPV6_MIN_MTU;
netdev->max_mtu = FBNIC_MAX_JUMBO_FRAME_SIZE - ETH_HLEN;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index a392ac1cc4f2..b84b447a8d8a 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -13,6 +13,9 @@
#define FBNIC_MAX_NAPI_VECTORS 128u
+/* Natively supported tunnel GSO features (not thru GSO_PARTIAL) */
+#define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6
+
struct fbnic_net {
struct fbnic_ring *tx[FBNIC_MAX_TXQS];
struct fbnic_ring *rx[FBNIC_MAX_RXQS];
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
index bb11fc83367d..860b02b22c15 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
@@ -133,7 +133,6 @@ int fbnic_phylink_init(struct net_device *netdev)
struct fbnic_net *fbn = netdev_priv(netdev);
struct phylink *phylink;
- fbn->phylink_pcs.neg_mode = true;
fbn->phylink_pcs.ops = &fbnic_phylink_pcs_ops;
fbn->phylink_config.dev = &netdev->dev;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
index c25bd300b902..8ff07b5562e3 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
@@ -3,6 +3,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <net/ipv6.h>
#include "fbnic.h"
#include "fbnic_netdev.h"
@@ -60,7 +61,7 @@ void fbnic_rss_disable_hw(struct fbnic_dev *fbd)
#define FBNIC_FH_2_RSSEM_BIT(_fh, _rssem, _val) \
FIELD_PREP(FBNIC_RPC_ACT_TBL1_RSS_ENA_##_rssem, \
FIELD_GET(RXH_##_fh, _val))
-static u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type)
+u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type)
{
u32 flow_hash = fbn->rss_flow_hash[flow_type];
u32 rss_en_mask = 0;
@@ -698,6 +699,359 @@ void fbnic_write_tce_tcam(struct fbnic_dev *fbd)
__fbnic_write_tce_tcam(fbd);
}
+struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd,
+ struct fbnic_ip_addr *ip_addr,
+ const struct in_addr *addr,
+ const struct in_addr *mask)
+{
+ struct fbnic_ip_addr *avail_addr = NULL;
+ unsigned int i;
+
+ /* Scan from top of list to bottom, filling bottom up. */
+ for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i++, ip_addr++) {
+ struct in6_addr *m = &ip_addr->mask;
+
+ if (ip_addr->state == FBNIC_TCAM_S_DISABLED) {
+ avail_addr = ip_addr;
+ continue;
+ }
+
+ if (ip_addr->version != 4)
+ continue;
+
+ /* Drop avail_addr if mask is a subset of our current mask,
+ * This prevents us from inserting a longer prefix behind a
+ * shorter one.
+ *
+ * The mask is stored inverted value so as an example:
+ * m ffff ffff ffff ffff ffff ffff ffff 0000 0000
+ * mask 0000 0000 0000 0000 0000 0000 0000 ffff ffff
+ *
+ * "m" and "mask" represent typical IPv4 mask stored in
+ * the TCAM and those provided by the stack. The code below
+ * should return a non-zero result if there is a 0 stored
+ * anywhere in "m" where "mask" has a 0.
+ */
+ if (~m->s6_addr32[3] & ~mask->s_addr) {
+ avail_addr = NULL;
+ continue;
+ }
+
+ /* Check to see if the mask actually contains fewer bits than
+ * our new mask "m". The XOR below should only result in 0 if
+ * "m" is masking a bit that we are looking for in our new
+ * "mask", we eliminated the 0^0 case with the check above.
+ *
+ * If it contains fewer bits we need to stop here, otherwise
+ * we might be adding an unreachable rule.
+ */
+ if (~(m->s6_addr32[3] ^ mask->s_addr))
+ break;
+
+ if (ip_addr->value.s6_addr32[3] == addr->s_addr) {
+ avail_addr = ip_addr;
+ break;
+ }
+ }
+
+ if (avail_addr && avail_addr->state == FBNIC_TCAM_S_DISABLED) {
+ ipv6_addr_set(&avail_addr->value, 0, 0, 0, addr->s_addr);
+ ipv6_addr_set(&avail_addr->mask, htonl(~0), htonl(~0),
+ htonl(~0), ~mask->s_addr);
+ avail_addr->version = 4;
+
+ avail_addr->state = FBNIC_TCAM_S_ADD;
+ }
+
+ return avail_addr;
+}
+
+struct fbnic_ip_addr *__fbnic_ip6_sync(struct fbnic_dev *fbd,
+ struct fbnic_ip_addr *ip_addr,
+ const struct in6_addr *addr,
+ const struct in6_addr *mask)
+{
+ struct fbnic_ip_addr *avail_addr = NULL;
+ unsigned int i;
+
+ ip_addr = &ip_addr[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES - 1];
+
+ /* Scan from bottom of list to top, filling top down. */
+ for (i = FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i--; ip_addr--) {
+ struct in6_addr *m = &ip_addr->mask;
+
+ if (ip_addr->state == FBNIC_TCAM_S_DISABLED) {
+ avail_addr = ip_addr;
+ continue;
+ }
+
+ if (ip_addr->version != 6)
+ continue;
+
+ /* Drop avail_addr if mask is a superset of our current mask.
+ * This prevents us from inserting a longer prefix behind a
+ * shorter one.
+ *
+ * The mask is stored inverted value so as an example:
+ * m 0000 0000 0000 0000 0000 0000 0000 0000 0000
+ * mask ffff ffff ffff ffff ffff ffff ffff ffff ffff
+ *
+ * "m" and "mask" represent typical IPv6 mask stored in
+ * the TCAM and those provided by the stack. The code below
+ * should return a non-zero result which will cause us
+ * to drop the avail_addr value that might be cached
+ * to prevent us from dropping a v6 address behind it.
+ */
+ if ((m->s6_addr32[0] & mask->s6_addr32[0]) |
+ (m->s6_addr32[1] & mask->s6_addr32[1]) |
+ (m->s6_addr32[2] & mask->s6_addr32[2]) |
+ (m->s6_addr32[3] & mask->s6_addr32[3])) {
+ avail_addr = NULL;
+ continue;
+ }
+
+ /* The previous test eliminated any overlap between the
+ * two values so now we need to check for gaps.
+ *
+ * If the mask is equal to our current mask then it should
+ * result with m ^ mask = ffff ffff, if however the value
+ * stored in m is bigger then we should see a 0 appear
+ * somewhere in the mask.
+ */
+ if (~(m->s6_addr32[0] ^ mask->s6_addr32[0]) |
+ ~(m->s6_addr32[1] ^ mask->s6_addr32[1]) |
+ ~(m->s6_addr32[2] ^ mask->s6_addr32[2]) |
+ ~(m->s6_addr32[3] ^ mask->s6_addr32[3]))
+ break;
+
+ if (ipv6_addr_cmp(&ip_addr->value, addr))
+ continue;
+
+ avail_addr = ip_addr;
+ break;
+ }
+
+ if (avail_addr && avail_addr->state == FBNIC_TCAM_S_DISABLED) {
+ memcpy(&avail_addr->value, addr, sizeof(*addr));
+ ipv6_addr_set(&avail_addr->mask,
+ ~mask->s6_addr32[0], ~mask->s6_addr32[1],
+ ~mask->s6_addr32[2], ~mask->s6_addr32[3]);
+ avail_addr->version = 6;
+
+ avail_addr->state = FBNIC_TCAM_S_ADD;
+ }
+
+ return avail_addr;
+}
+
+int __fbnic_ip_unsync(struct fbnic_ip_addr *ip_addr, unsigned int tcam_idx)
+{
+ if (!test_and_clear_bit(tcam_idx, ip_addr->act_tcam))
+ return -ENOENT;
+
+ if (bitmap_empty(ip_addr->act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES))
+ ip_addr->state = FBNIC_TCAM_S_DELETE;
+
+ return 0;
+}
+
+static void fbnic_clear_ip_src_entry(struct fbnic_dev *fbd, unsigned int idx)
+{
+ int i;
+
+ /* Invalidate entry and clear addr state info */
+ for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i), 0);
+}
+
+static void fbnic_clear_ip_dst_entry(struct fbnic_dev *fbd, unsigned int idx)
+{
+ int i;
+
+ /* Invalidate entry and clear addr state info */
+ for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i), 0);
+}
+
+static void fbnic_clear_ip_outer_src_entry(struct fbnic_dev *fbd,
+ unsigned int idx)
+{
+ int i;
+
+ /* Invalidate entry and clear addr state info */
+ for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), 0);
+}
+
+static void fbnic_clear_ip_outer_dst_entry(struct fbnic_dev *fbd,
+ unsigned int idx)
+{
+ int i;
+
+ /* Invalidate entry and clear addr state info */
+ for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), 0);
+}
+
+static void fbnic_write_ip_src_entry(struct fbnic_dev *fbd, unsigned int idx,
+ struct fbnic_ip_addr *ip_addr)
+{
+ __be16 *mask, *value;
+ int i;
+
+ mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+ value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+
+ for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i),
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) |
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--)));
+ wrfl(fbd);
+
+ /* Bit 129 is used to flag for v4/v6 */
+ wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i),
+ (ip_addr->version == 6) | FBNIC_RPC_TCAM_VALIDATE);
+}
+
+static void fbnic_write_ip_dst_entry(struct fbnic_dev *fbd, unsigned int idx,
+ struct fbnic_ip_addr *ip_addr)
+{
+ __be16 *mask, *value;
+ int i;
+
+ mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+ value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+
+ for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i),
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) |
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--)));
+ wrfl(fbd);
+
+ /* Bit 129 is used to flag for v4/v6 */
+ wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i),
+ (ip_addr->version == 6) | FBNIC_RPC_TCAM_VALIDATE);
+}
+
+static void fbnic_write_ip_outer_src_entry(struct fbnic_dev *fbd,
+ unsigned int idx,
+ struct fbnic_ip_addr *ip_addr)
+{
+ __be16 *mask, *value;
+ int i;
+
+ mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+ value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+
+ for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i),
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) |
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--)));
+ wrfl(fbd);
+
+ wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), FBNIC_RPC_TCAM_VALIDATE);
+}
+
+static void fbnic_write_ip_outer_dst_entry(struct fbnic_dev *fbd,
+ unsigned int idx,
+ struct fbnic_ip_addr *ip_addr)
+{
+ __be16 *mask, *value;
+ int i;
+
+ mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+ value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1];
+
+ for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++)
+ wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i),
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) |
+ FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--)));
+ wrfl(fbd);
+
+ wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), FBNIC_RPC_TCAM_VALIDATE);
+}
+
+void fbnic_write_ip_addr(struct fbnic_dev *fbd)
+{
+ int idx;
+
+ for (idx = ARRAY_SIZE(fbd->ip_src); idx--;) {
+ struct fbnic_ip_addr *ip_addr = &fbd->ip_src[idx];
+
+ /* Check if update flag is set else skip. */
+ if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE))
+ continue;
+
+ /* Clear by writing 0s. */
+ if (ip_addr->state == FBNIC_TCAM_S_DELETE) {
+ /* Invalidate entry and clear addr state info */
+ fbnic_clear_ip_src_entry(fbd, idx);
+ memset(ip_addr, 0, sizeof(*ip_addr));
+
+ continue;
+ }
+
+ fbnic_write_ip_src_entry(fbd, idx, ip_addr);
+
+ ip_addr->state = FBNIC_TCAM_S_VALID;
+ }
+
+ /* Repeat process for other IP TCAMs */
+ for (idx = ARRAY_SIZE(fbd->ip_dst); idx--;) {
+ struct fbnic_ip_addr *ip_addr = &fbd->ip_dst[idx];
+
+ if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE))
+ continue;
+
+ if (ip_addr->state == FBNIC_TCAM_S_DELETE) {
+ fbnic_clear_ip_dst_entry(fbd, idx);
+ memset(ip_addr, 0, sizeof(*ip_addr));
+
+ continue;
+ }
+
+ fbnic_write_ip_dst_entry(fbd, idx, ip_addr);
+
+ ip_addr->state = FBNIC_TCAM_S_VALID;
+ }
+
+ for (idx = ARRAY_SIZE(fbd->ipo_src); idx--;) {
+ struct fbnic_ip_addr *ip_addr = &fbd->ipo_src[idx];
+
+ if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE))
+ continue;
+
+ if (ip_addr->state == FBNIC_TCAM_S_DELETE) {
+ fbnic_clear_ip_outer_src_entry(fbd, idx);
+ memset(ip_addr, 0, sizeof(*ip_addr));
+
+ continue;
+ }
+
+ fbnic_write_ip_outer_src_entry(fbd, idx, ip_addr);
+
+ ip_addr->state = FBNIC_TCAM_S_VALID;
+ }
+
+ for (idx = ARRAY_SIZE(fbd->ipo_dst); idx--;) {
+ struct fbnic_ip_addr *ip_addr = &fbd->ipo_dst[idx];
+
+ if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE))
+ continue;
+
+ if (ip_addr->state == FBNIC_TCAM_S_DELETE) {
+ fbnic_clear_ip_outer_dst_entry(fbd, idx);
+ memset(ip_addr, 0, sizeof(*ip_addr));
+
+ continue;
+ }
+
+ fbnic_write_ip_outer_dst_entry(fbd, idx, ip_addr);
+
+ ip_addr->state = FBNIC_TCAM_S_VALID;
+ }
+}
+
void fbnic_clear_rules(struct fbnic_dev *fbd)
{
u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h
index 0d8285fa5b45..6892414195c3 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h
@@ -7,6 +7,8 @@
#include <uapi/linux/in6.h>
#include <linux/bitfield.h>
+struct in_addr;
+
/* The TCAM state definitions follow an expected ordering.
* They start out disabled, then move through the following states:
* Disabled 0 -> Add 2
@@ -32,6 +34,12 @@ enum {
#define FBNIC_RPC_TCAM_MACDA_WORD_LEN 3
#define FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES 32
+/* 8 IPSRC and IPDST TCAM Entries each
+ * 8 registers, Validate each
+ */
+#define FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN 8
+#define FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES 8
+
#define FBNIC_RPC_TCAM_ACT_WORD_LEN 11
#define FBNIC_RPC_TCAM_ACT_NUM_ENTRIES 64
@@ -47,6 +55,13 @@ struct fbnic_mac_addr {
DECLARE_BITMAP(act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES);
};
+struct fbnic_ip_addr {
+ struct in6_addr mask, value;
+ unsigned char version;
+ unsigned char state;
+ DECLARE_BITMAP(act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES);
+};
+
struct fbnic_act_tcam {
struct {
u16 tcam[FBNIC_RPC_TCAM_ACT_WORD_LEN];
@@ -81,6 +96,11 @@ enum {
#define FBNIC_RPC_ACT_TBL_BMC_OFFSET 0
#define FBNIC_RPC_ACT_TBL_BMC_ALL_MULTI_OFFSET 1
+/* This should leave us with 48 total entries in the TCAM that can be used
+ * for NFC after also deducting the 14 needed for RSS table programming.
+ */
+#define FBNIC_RPC_ACT_TBL_NFC_OFFSET 2
+
/* We reserve the last 14 entries for RSS rules on the host. The BMC
* unicast rule will need to be populated above these and is expected to
* use MACDA TCAM entry 23 to store the BMC MAC address.
@@ -88,6 +108,9 @@ enum {
#define FBNIC_RPC_ACT_TBL_RSS_OFFSET \
(FBNIC_RPC_ACT_TBL_NUM_ENTRIES - FBNIC_RSS_EN_NUM_ENTRIES)
+#define FBNIC_RPC_ACT_TBL_NFC_ENTRIES \
+ (FBNIC_RPC_ACT_TBL_RSS_OFFSET - FBNIC_RPC_ACT_TBL_NFC_OFFSET)
+
/* Flags used to identify the owner for this MAC filter. Note that any
* flags set for Broadcast thru Promisc indicate that the rule belongs
* to the RSS filters for the host.
@@ -168,6 +191,7 @@ void fbnic_rss_init_en_mask(struct fbnic_net *fbn);
void fbnic_rss_disable_hw(struct fbnic_dev *fbd);
void fbnic_rss_reinit_hw(struct fbnic_dev *fbd, struct fbnic_net *fbn);
void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn);
+u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type);
int __fbnic_xc_unsync(struct fbnic_mac_addr *mac_addr, unsigned int tcam_idx);
struct fbnic_mac_addr *__fbnic_uc_sync(struct fbnic_dev *fbd,
@@ -177,6 +201,17 @@ struct fbnic_mac_addr *__fbnic_mc_sync(struct fbnic_dev *fbd,
void fbnic_sift_macda(struct fbnic_dev *fbd);
void fbnic_write_macda(struct fbnic_dev *fbd);
+struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd,
+ struct fbnic_ip_addr *ip_addr,
+ const struct in_addr *addr,
+ const struct in_addr *mask);
+struct fbnic_ip_addr *__fbnic_ip6_sync(struct fbnic_dev *fbd,
+ struct fbnic_ip_addr *ip_addr,
+ const struct in6_addr *addr,
+ const struct in6_addr *mask);
+int __fbnic_ip_unsync(struct fbnic_ip_addr *ip_addr, unsigned int tcam_idx);
+void fbnic_write_ip_addr(struct fbnic_dev *fbd);
+
static inline int __fbnic_uc_unsync(struct fbnic_mac_addr *mac_addr)
{
return __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_UNICAST);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index d4d7027df9a0..b2e544a66de3 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -6,6 +6,7 @@
#include <linux/pci.h>
#include <net/netdev_queues.h>
#include <net/page_pool/helpers.h>
+#include <net/tcp.h>
#include "fbnic.h"
#include "fbnic_csr.h"
@@ -18,6 +19,7 @@ enum {
struct fbnic_xmit_cb {
u32 bytecount;
+ u16 gso_segs;
u8 desc_count;
u8 flags;
int hw_head;
@@ -113,6 +115,11 @@ static int fbnic_maybe_stop_tx(const struct net_device *dev,
res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size,
FBNIC_TX_DESC_WAKEUP);
+ if (!res) {
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.twq.stop++;
+ u64_stats_update_end(&ring->stats.syncp);
+ }
return !res;
}
@@ -174,8 +181,72 @@ static bool fbnic_tx_tstamp(struct sk_buff *skb)
}
static bool
+fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb,
+ struct skb_shared_info *shinfo, __le64 *meta,
+ unsigned int *l2len, unsigned int *i3len)
+{
+ unsigned int l3_type, l4_type, l4len, hdrlen;
+ unsigned char *l4hdr;
+ __be16 payload_len;
+
+ if (unlikely(skb_cow_head(skb, 0)))
+ return true;
+
+ if (shinfo->gso_type & SKB_GSO_PARTIAL) {
+ l3_type = FBNIC_TWD_L3_TYPE_OTHER;
+ } else if (!skb->encapsulation) {
+ if (ip_hdr(skb)->version == 4)
+ l3_type = FBNIC_TWD_L3_TYPE_IPV4;
+ else
+ l3_type = FBNIC_TWD_L3_TYPE_IPV6;
+ } else {
+ unsigned int o3len;
+
+ o3len = skb_inner_network_header(skb) - skb_network_header(skb);
+ *i3len -= o3len;
+ *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK,
+ o3len / 2));
+ l3_type = FBNIC_TWD_L3_TYPE_V6V6;
+ }
+
+ l4hdr = skb_checksum_start(skb);
+ payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data));
+
+ if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
+ struct tcphdr *tcph = (struct tcphdr *)l4hdr;
+
+ l4_type = FBNIC_TWD_L4_TYPE_TCP;
+ l4len = __tcp_hdrlen((struct tcphdr *)l4hdr);
+ csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len);
+ } else {
+ struct udphdr *udph = (struct udphdr *)l4hdr;
+
+ l4_type = FBNIC_TWD_L4_TYPE_UDP;
+ l4len = sizeof(struct udphdr);
+ csum_replace_by_diff(&udph->check, (__force __wsum)payload_len);
+ }
+
+ hdrlen = (l4hdr - skb->data) + l4len;
+ *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) |
+ FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) |
+ FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) |
+ FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) |
+ FBNIC_TWD_FLAG_REQ_LSO);
+
+ FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen;
+ FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs;
+
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.twq.lso += shinfo->gso_segs;
+ u64_stats_update_end(&ring->stats.syncp);
+
+ return false;
+}
+
+static bool
fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
unsigned int l2len, i3len;
if (fbnic_tx_tstamp(skb))
@@ -190,7 +261,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK,
skb->csum_offset / 2));
- *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO);
+ if (shinfo->gso_size) {
+ if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len))
+ return true;
+ } else {
+ *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO);
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.twq.csum_partial++;
+ u64_stats_update_end(&ring->stats.syncp);
+ }
*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) |
FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2));
@@ -198,12 +277,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
}
static void
-fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq)
+fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq,
+ u64 *csum_cmpl, u64 *csum_none)
{
skb_checksum_none_assert(skb);
- if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM)))
+ if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) {
+ (*csum_none)++;
return;
+ }
if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -212,6 +294,7 @@ fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq)
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = (__force __wsum)csum;
+ (*csum_cmpl)++;
}
}
@@ -329,7 +412,9 @@ fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring)
/* Write all members within DWORD to condense this into 2 4B writes */
FBNIC_XMIT_CB(skb)->bytecount = skb->len;
+ FBNIC_XMIT_CB(skb)->gso_segs = 1;
FBNIC_XMIT_CB(skb)->desc_count = 0;
+ FBNIC_XMIT_CB(skb)->flags = 0;
if (fbnic_tx_offloads(ring, skb, meta))
goto err_free;
@@ -356,6 +441,59 @@ netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev)
return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]);
}
+static netdev_features_t
+fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev,
+ netdev_features_t features, unsigned int l3len)
+{
+ netdev_features_t skb_gso_features;
+ struct ipv6hdr *ip6_hdr;
+ unsigned char l4_hdr;
+ unsigned int start;
+ __be16 frag_off;
+
+ /* Require MANGLEID for GSO_PARTIAL of IPv4.
+ * In theory we could support TSO with single, innermost v4 header
+ * by pretending everything before it is L2, but that needs to be
+ * parsed case by case.. so leaving it for when the need arises.
+ */
+ if (!(features & NETIF_F_TSO_MANGLEID))
+ features &= ~NETIF_F_TSO;
+
+ skb_gso_features = skb_shinfo(skb)->gso_type;
+ skb_gso_features <<= NETIF_F_GSO_SHIFT;
+
+ /* We'd only clear the native GSO features, so don't bother validating
+ * if the match can only be on those supported thru GSO_PARTIAL.
+ */
+ if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES))
+ return features;
+
+ /* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice
+ * to fall back to partial for this, or any failure below.
+ * This is just an optimization, UDPv4 will be caught later on.
+ */
+ if (skb_gso_features & NETIF_F_TSO)
+ return features & ~FBNIC_TUN_GSO_FEATURES;
+
+ /* Inner headers multiple of 2 */
+ if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2)
+ return features & ~FBNIC_TUN_GSO_FEATURES;
+
+ /* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */
+ ip6_hdr = ipv6_hdr(skb);
+ if (ip6_hdr->version != 6)
+ return features & ~FBNIC_TUN_GSO_FEATURES;
+
+ l4_hdr = ip6_hdr->nexthdr;
+ start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr);
+ start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off);
+ if (frag_off || l4_hdr != IPPROTO_IPV6 ||
+ skb->data + start != skb_inner_network_header(skb))
+ return features & ~FBNIC_TUN_GSO_FEATURES;
+
+ return features;
+}
+
netdev_features_t
fbnic_features_check(struct sk_buff *skb, struct net_device *dev,
netdev_features_t features)
@@ -376,9 +514,12 @@ fbnic_features_check(struct sk_buff *skb, struct net_device *dev,
!FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) ||
!FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) ||
!FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2))
- return features & ~NETIF_F_CSUM_MASK;
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
- return features;
+ if (likely(!skb->encapsulation) || !skb_is_gso(skb))
+ return features;
+
+ return fbnic_features_check_encap_gso(skb, dev, features, l3len);
}
static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
@@ -429,7 +570,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
}
total_bytes += FBNIC_XMIT_CB(skb)->bytecount;
- total_packets += 1;
+ total_packets += FBNIC_XMIT_CB(skb)->gso_segs;
napi_consume_skb(skb, napi_budget);
}
@@ -444,7 +585,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
if (unlikely(discard)) {
u64_stats_update_begin(&ring->stats.syncp);
ring->stats.dropped += total_packets;
- ring->stats.ts_lost += ts_lost;
+ ring->stats.twq.ts_lost += ts_lost;
u64_stats_update_end(&ring->stats.syncp);
netdev_tx_completed_queue(txq, total_packets, total_bytes);
@@ -456,9 +597,13 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
ring->stats.packets += total_packets;
u64_stats_update_end(&ring->stats.syncp);
- netif_txq_completed_wake(txq, total_packets, total_bytes,
- fbnic_desc_unused(ring),
- FBNIC_TX_DESC_WAKEUP);
+ if (!netif_txq_completed_wake(txq, total_packets, total_bytes,
+ fbnic_desc_unused(ring),
+ FBNIC_TX_DESC_WAKEUP)) {
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.twq.wake++;
+ u64_stats_update_end(&ring->stats.syncp);
+ }
}
static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
@@ -507,7 +652,7 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
skb_tstamp_tx(skb, &hwtstamp);
u64_stats_update_begin(&ring->stats.syncp);
- ring->stats.ts_packets++;
+ ring->stats.twq.ts_packets++;
u64_stats_update_end(&ring->stats.syncp);
}
@@ -661,8 +806,13 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
struct page *page;
page = page_pool_dev_alloc_pages(nv->page_pool);
- if (!page)
+ if (!page) {
+ u64_stats_update_begin(&bdq->stats.syncp);
+ bdq->stats.rx.alloc_failed++;
+ u64_stats_update_end(&bdq->stats.syncp);
+
break;
+ }
fbnic_page_pool_init(bdq, i, page);
fbnic_bd_prep(bdq, i, page);
@@ -875,12 +1025,13 @@ static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd,
static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv,
u64 rcd, struct sk_buff *skb,
- struct fbnic_q_triad *qt)
+ struct fbnic_q_triad *qt,
+ u64 *csum_cmpl, u64 *csum_none)
{
struct net_device *netdev = nv->napi.dev;
struct fbnic_ring *rcq = &qt->cmpl;
- fbnic_rx_csum(rcd, skb, rcq);
+ fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none);
if (netdev->features & NETIF_F_RXHASH)
skb_set_hash(skb,
@@ -898,7 +1049,8 @@ static bool fbnic_rcd_metadata_err(u64 rcd)
static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
struct fbnic_q_triad *qt, int budget)
{
- unsigned int packets = 0, bytes = 0, dropped = 0;
+ unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0;
+ u64 csum_complete = 0, csum_none = 0;
struct fbnic_ring *rcq = &qt->cmpl;
struct fbnic_pkt_buff *pkt;
s32 head0 = -1, head1 = -1;
@@ -947,14 +1099,22 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
/* Populate skb and invalidate XDP */
if (!IS_ERR_OR_NULL(skb)) {
- fbnic_populate_skb_fields(nv, rcd, skb, qt);
+ fbnic_populate_skb_fields(nv, rcd, skb, qt,
+ &csum_complete,
+ &csum_none);
packets++;
bytes += skb->len;
napi_gro_receive(&nv->napi, skb);
} else {
- dropped++;
+ if (!skb) {
+ alloc_failed++;
+ dropped++;
+ } else {
+ dropped++;
+ }
+
fbnic_put_pkt_buff(nv, pkt, 1);
}
@@ -977,6 +1137,9 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
/* Re-add ethernet header length (removed in fbnic_build_skb) */
rcq->stats.bytes += ETH_HLEN * packets;
rcq->stats.dropped += dropped;
+ rcq->stats.rx.alloc_failed += alloc_failed;
+ rcq->stats.rx.csum_complete += csum_complete;
+ rcq->stats.rx.csum_none += csum_none;
u64_stats_update_end(&rcq->stats.syncp);
/* Unmap and free processed buffers */
@@ -1054,6 +1217,11 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
fbn->rx_stats.bytes += stats->bytes;
fbn->rx_stats.packets += stats->packets;
fbn->rx_stats.dropped += stats->dropped;
+ fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed;
+ fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete;
+ fbn->rx_stats.rx.csum_none += stats->rx.csum_none;
+ /* Remember to add new stats here */
+ BUILD_BUG_ON(sizeof(fbn->tx_stats.rx) / 8 != 3);
}
void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
@@ -1065,8 +1233,14 @@ void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
fbn->tx_stats.bytes += stats->bytes;
fbn->tx_stats.packets += stats->packets;
fbn->tx_stats.dropped += stats->dropped;
- fbn->tx_stats.ts_lost += stats->ts_lost;
- fbn->tx_stats.ts_packets += stats->ts_packets;
+ fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial;
+ fbn->tx_stats.twq.lso += stats->twq.lso;
+ fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost;
+ fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets;
+ fbn->tx_stats.twq.stop += stats->twq.stop;
+ fbn->tx_stats.twq.wake += stats->twq.wake;
+ /* Remember to add new stats here */
+ BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6);
}
static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index c2a94f31f71b..89a5c394f846 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -56,9 +56,22 @@ struct fbnic_pkt_buff {
struct fbnic_queue_stats {
u64 packets;
u64 bytes;
+ union {
+ struct {
+ u64 csum_partial;
+ u64 lso;
+ u64 ts_packets;
+ u64 ts_lost;
+ u64 stop;
+ u64 wake;
+ } twq;
+ struct {
+ u64 alloc_failed;
+ u64 csum_complete;
+ u64 csum_none;
+ } rx;
+ };
u64 dropped;
- u64 ts_packets;
- u64 ts_lost;
struct u64_stats_sync syncp;
};
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 3234a960fcc3..0af143ec0f86 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -828,7 +828,6 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
port->phylink_config.type = PHYLINK_NETDEV;
port->phylink_pcs.poll = true;
port->phylink_pcs.ops = &lan966x_phylink_pcs_ops;
- port->phylink_pcs.neg_mode = true;
port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100 | MAC_1000FD | MAC_2500FD;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 6a0e5b83ecd0..74ad1d73b465 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -338,7 +338,6 @@ static int sparx5_create_port(struct sparx5 *sparx5,
spx5_port->custom_etype = 0x8880; /* Vitesse */
spx5_port->phylink_pcs.poll = true;
spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops;
- spx5_port->phylink_pcs.neg_mode = true;
spx5_port->is_mrouter = false;
INIT_LIST_HEAD(&spx5_port->tc_templates);
sparx5->ports[config->portno] = spx5_port;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index be95336ce089..c15a5ef4674e 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -666,8 +666,11 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd,
gmi = &queue->mem_info;
err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
- if (err)
+ if (err) {
+ dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n",
+ spec->type, spec->queue_size, err);
goto free_q;
+ }
queue->head = 0;
queue->tail = 0;
@@ -688,6 +691,8 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd,
*queue_ptr = queue;
return 0;
out:
+ dev_err(gc->dev, "Failed to create queue type %d of size %u, err: %d\n",
+ spec->type, spec->queue_size, err);
mana_gd_free_memory(gmi);
free_q:
kfree(queue);
@@ -770,7 +775,13 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
}
gmi->dma_region_handle = resp.dma_region_handle;
+ dev_dbg(gc->dev, "Created DMA region handle 0x%llx\n",
+ gmi->dma_region_handle);
out:
+ if (err)
+ dev_dbg(gc->dev,
+ "Failed to create DMA region of length: %u, page_type: %d, status: 0x%x, err: %d\n",
+ length, req->gdma_page_type, resp.hdr.status, err);
kfree(req);
return err;
}
@@ -793,8 +804,11 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd,
gmi = &queue->mem_info;
err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
- if (err)
+ if (err) {
+ dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n",
+ spec->type, spec->queue_size, err);
goto free_q;
+ }
err = mana_gd_create_dma_region(gd, gmi);
if (err)
@@ -815,6 +829,8 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd,
*queue_ptr = queue;
return 0;
out:
+ dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n",
+ spec->type, spec->queue_size, err);
mana_gd_free_memory(gmi);
free_q:
kfree(queue);
@@ -841,8 +857,11 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
gmi = &queue->mem_info;
err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
- if (err)
+ if (err) {
+ dev_err(gc->dev, "GDMA queue type: %d, size: %u, memory allocation err: %d\n",
+ spec->type, spec->queue_size, err);
goto free_q;
+ }
err = mana_gd_create_dma_region(gd, gmi);
if (err)
@@ -862,6 +881,8 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
*queue_ptr = queue;
return 0;
out:
+ dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n",
+ spec->type, spec->queue_size, err);
mana_gd_free_memory(gmi);
free_q:
kfree(queue);
@@ -1157,8 +1178,11 @@ int mana_gd_post_and_ring(struct gdma_queue *queue,
int err;
err = mana_gd_post_work_request(queue, wqe_req, wqe_info);
- if (err)
+ if (err) {
+ dev_err(gc->dev, "Failed to post work req from queue type %d of size %u (err=%d)\n",
+ queue->type, queue->queue_size, err);
return err;
+ }
mana_gd_wq_ring_doorbell(gc, queue);
@@ -1435,8 +1459,10 @@ static int mana_gd_setup(struct pci_dev *pdev)
mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
err = mana_gd_setup_irqs(pdev);
- if (err)
+ if (err) {
+ dev_err(gc->dev, "Failed to setup IRQs: %d\n", err);
return err;
+ }
err = mana_hwc_create_channel(gc);
if (err)
@@ -1454,12 +1480,14 @@ static int mana_gd_setup(struct pci_dev *pdev)
if (err)
goto destroy_hwc;
+ dev_dbg(&pdev->dev, "mana gdma setup successful\n");
return 0;
destroy_hwc:
mana_hwc_destroy_channel(gc);
remove_irq:
mana_gd_remove_irqs(pdev);
+ dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err);
return err;
}
@@ -1470,6 +1498,7 @@ static void mana_gd_cleanup(struct pci_dev *pdev)
mana_hwc_destroy_channel(gc);
mana_gd_remove_irqs(pdev);
+ dev_dbg(&pdev->dev, "mana gdma cleanup successful\n");
}
static bool mana_is_pf(unsigned short dev_id)
@@ -1488,8 +1517,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE);
err = pci_enable_device(pdev);
- if (err)
+ if (err) {
+ dev_err(&pdev->dev, "Failed to enable pci device (err=%d)\n", err);
return -ENXIO;
+ }
pci_set_master(pdev);
@@ -1498,9 +1529,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto disable_dev;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (err)
+ if (err) {
+ dev_err(&pdev->dev, "DMA set mask failed: %d\n", err);
goto release_region;
-
+ }
dma_set_max_seg_size(&pdev->dev, UINT_MAX);
err = -ENOMEM;
@@ -1575,6 +1607,8 @@ static void mana_gd_remove(struct pci_dev *pdev)
pci_release_regions(pdev);
pci_disable_device(pdev);
+
+ dev_dbg(&pdev->dev, "mana gdma remove successful\n");
}
/* The 'state' parameter is not used. */
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index a00f915c5188..1ba49602089b 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -440,7 +440,8 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth,
gmi = &dma_buf->mem_info;
err = mana_gd_alloc_memory(gc, buf_size, gmi);
if (err) {
- dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err);
+ dev_err(hwc->dev, "Failed to allocate DMA buffer size: %u, err %d\n",
+ buf_size, err);
goto out;
}
@@ -529,6 +530,9 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc,
out:
if (err)
mana_hwc_destroy_wq(hwc, hwc_wq);
+
+ dev_err(hwc->dev, "Failed to create HWC queue size= %u type= %d err= %d\n",
+ queue_size, q_type, err);
return err;
}
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index aa1e47233fe5..0411a1897f57 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -52,10 +52,12 @@ static int mana_open(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
int err;
-
err = mana_alloc_queues(ndev);
- if (err)
+
+ if (err) {
+ netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err);
return err;
+ }
apc->port_is_up = true;
@@ -64,7 +66,7 @@ static int mana_open(struct net_device *ndev)
netif_carrier_on(ndev);
netif_tx_wake_all_queues(ndev);
-
+ netdev_dbg(ndev, "%s successful\n", __func__);
return 0;
}
@@ -176,6 +178,9 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
return 0;
frag_err:
+ if (net_ratelimit())
+ netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n",
+ skb->len);
for (i = sg_i - 1; i >= hsg; i--)
dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE);
@@ -256,6 +261,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
if (skb_cow_head(skb, MANA_HEADROOM))
goto tx_drop_count;
+ if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
+ goto tx_drop_count;
+
txq = &apc->tx_qp[txq_idx].txq;
gdma_sq = txq->gdma_sq;
cq = &apc->tx_qp[txq_idx].tx_cq;
@@ -687,6 +695,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu
return 0;
error:
+ netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues);
mana_pre_dealloc_rxbufs(mpc);
return -ENOMEM;
}
@@ -1304,8 +1313,10 @@ static int mana_create_eq(struct mana_context *ac)
for (i = 0; i < gc->max_num_queues; i++) {
spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
- if (err)
+ if (err) {
+ dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err);
goto out;
+ }
mana_create_eq_debugfs(ac, i);
}
@@ -2080,6 +2091,8 @@ static int mana_create_txq(struct mana_port_context *apc,
return 0;
out:
+ netdev_err(net, "Failed to create %d TX queues, %d\n",
+ apc->num_queues, err);
mana_destroy_txq(apc);
return err;
}
@@ -2415,6 +2428,7 @@ static int mana_add_rx_queues(struct mana_port_context *apc,
rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev);
if (!rxq) {
err = -ENOMEM;
+ netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err);
goto out;
}
@@ -2661,12 +2675,18 @@ int mana_alloc_queues(struct net_device *ndev)
int err;
err = mana_create_vport(apc, ndev);
- if (err)
+ if (err) {
+ netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err);
return err;
+ }
err = netif_set_real_num_tx_queues(ndev, apc->num_queues);
- if (err)
+ if (err) {
+ netdev_err(ndev,
+ "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n",
+ apc->num_queues, err);
goto destroy_vport;
+ }
err = mana_add_rx_queues(apc, ndev);
if (err)
@@ -2675,14 +2695,20 @@ int mana_alloc_queues(struct net_device *ndev)
apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE;
err = netif_set_real_num_rx_queues(ndev, apc->num_queues);
- if (err)
+ if (err) {
+ netdev_err(ndev,
+ "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n",
+ apc->num_queues, err);
goto destroy_vport;
+ }
mana_rss_table_init(apc);
err = mana_config_rss(apc, TRI_STATE_TRUE, true, true);
- if (err)
+ if (err) {
+ netdev_err(ndev, "Failed to configure RSS table: %d\n", err);
goto destroy_vport;
+ }
if (gd->gdma_context->is_pf) {
err = mana_pf_register_filter(apc);
@@ -2823,8 +2849,10 @@ int mana_detach(struct net_device *ndev, bool from_close)
if (apc->port_st_save) {
err = mana_dealloc_queues(ndev);
- if (err)
+ if (err) {
+ netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err);
return err;
+ }
}
if (!from_close) {
@@ -2873,6 +2901,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
ndev->dev_port = port_idx;
SET_NETDEV_DEV(ndev, gc->dev);
+ netif_set_tso_max_size(ndev, GSO_MAX_SIZE);
+
netif_carrier_off(ndev);
netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE);
@@ -2968,6 +2998,8 @@ static int add_adev(struct gdma_dev *gd)
goto add_fail;
gd->adev = adev;
+ dev_dbg(gd->gdma_context->dev,
+ "Auxiliary device added successfully\n");
return 0;
add_fail:
@@ -3009,8 +3041,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
}
err = mana_create_eq(ac);
- if (err)
+ if (err) {
+ dev_err(dev, "Failed to create EQs: %d\n", err);
goto out;
+ }
err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
MANA_MICRO_VERSION, &num_ports);
@@ -3066,8 +3100,14 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
err = add_adev(gd);
out:
- if (err)
+ if (err) {
mana_remove(gd, false);
+ } else {
+ dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n",
+ gd, gd->dev_id.as_uint32, ac->num_ports,
+ gd->dev_id.type, gd->dev_id.instance);
+ dev_dbg(dev, "%s succeeded\n", __func__);
+ }
return err;
}
@@ -3129,6 +3169,7 @@ out:
gd->driver_data = NULL;
gd->gdma_context = NULL;
kfree(ac);
+ dev_dbg(dev, "%s succeeded\n", __func__);
}
struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
index 0d6c59d6d4ae..ea6a288c0d5e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
@@ -83,42 +83,12 @@ nfp_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
return 0;
}
-static u32 nfp_chip_config[] = {
- HWMON_C_REGISTER_TZ,
- 0
-};
-
-static const struct hwmon_channel_info nfp_chip = {
- .type = hwmon_chip,
- .config = nfp_chip_config,
-};
-
-static u32 nfp_temp_config[] = {
- HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT,
- 0
-};
-
-static const struct hwmon_channel_info nfp_temp = {
- .type = hwmon_temp,
- .config = nfp_temp_config,
-};
-
-static u32 nfp_power_config[] = {
- HWMON_P_INPUT | HWMON_P_MAX,
- HWMON_P_INPUT,
- HWMON_P_INPUT,
- 0
-};
-
-static const struct hwmon_channel_info nfp_power = {
- .type = hwmon_power,
- .config = nfp_power_config,
-};
-
static const struct hwmon_channel_info * const nfp_hwmon_info[] = {
- &nfp_chip,
- &nfp_temp,
- &nfp_power,
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT),
+ HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_MAX,
+ HWMON_P_INPUT,
+ HWMON_P_INPUT),
NULL
};
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index fa167b1aa019..5222a035fd19 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -3033,7 +3033,7 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn,
u16 length;
int rc;
- /* Valiate PF can send such a request */
+ /* Validate PF can send such a request */
if (!vf->vport_instance) {
DP_VERBOSE(p_hwfn,
QED_MSG_IOV,
@@ -3312,7 +3312,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn,
goto out;
}
- /* Determine if the unicast filtering is acceptible by PF */
+ /* Determine if the unicast filtering is acceptable by PF */
if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) &&
(params.type == QED_FILTER_VLAN ||
params.type == QED_FILTER_MAC_VLAN)) {
@@ -3729,7 +3729,7 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn,
rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, p_vf);
if (rc) {
- DP_ERR(p_hwfn, "Failed to re-enable VF[%d] acces\n",
+ DP_ERR(p_hwfn, "Failed to re-enable VF[%d] access\n",
vfid);
return rc;
}
@@ -4480,7 +4480,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
/* Failure to acquire the ptt in 100g creates an odd error
- * where the first engine has already relased IOV.
+ * where the first engine has already released IOV.
*/
if (!ptt) {
DP_ERR(hwfn, "Failed to acquire ptt\n");
diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig
index 8a8ea51c639e..fe136f61586f 100644
--- a/drivers/net/ethernet/realtek/Kconfig
+++ b/drivers/net/ethernet/realtek/Kconfig
@@ -114,7 +114,8 @@ config R8169
will be called r8169. This is recommended.
config R8169_LEDS
- def_bool R8169 && LEDS_TRIGGER_NETDEV
+ bool "Support for controlling the NIC LEDs"
+ depends on R8169 && LEDS_TRIGGER_NETDEV
depends on !(R8169=y && LEDS_CLASS=m)
help
Optional support for controlling the NIC LED's with the netdev
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5a5eba49c651..fa339bd8c775 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -169,6 +169,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
{ PCI_VDEVICE(REALTEK, 0x8125) },
{ PCI_VDEVICE(REALTEK, 0x8126) },
{ PCI_VDEVICE(REALTEK, 0x3000) },
+ { PCI_VDEVICE(REALTEK, 0x5000) },
{}
};
@@ -5199,6 +5200,33 @@ static int r8169_mdio_write_reg(struct mii_bus *mii_bus, int phyaddr,
return 0;
}
+static int r8169_mdio_read_reg_c45(struct mii_bus *mii_bus, int addr,
+ int devnum, int regnum)
+{
+ struct rtl8169_private *tp = mii_bus->priv;
+
+ if (addr > 0)
+ return -ENODEV;
+
+ if (devnum == MDIO_MMD_VEND2 && regnum > MDIO_STAT2)
+ return r8168_phy_ocp_read(tp, regnum);
+
+ return 0;
+}
+
+static int r8169_mdio_write_reg_c45(struct mii_bus *mii_bus, int addr,
+ int devnum, int regnum, u16 val)
+{
+ struct rtl8169_private *tp = mii_bus->priv;
+
+ if (addr > 0 || devnum != MDIO_MMD_VEND2 || regnum <= MDIO_STAT2)
+ return -ENODEV;
+
+ r8168_phy_ocp_write(tp, regnum, val);
+
+ return 0;
+}
+
static int r8169_mdio_register(struct rtl8169_private *tp)
{
struct pci_dev *pdev = tp->pci_dev;
@@ -5222,12 +5250,18 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
new_bus->priv = tp;
new_bus->parent = &pdev->dev;
new_bus->irq[0] = PHY_MAC_INTERRUPT;
+ new_bus->phy_mask = GENMASK(31, 1);
snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x",
pci_domain_nr(pdev->bus), pci_dev_id(pdev));
new_bus->read = r8169_mdio_read_reg;
new_bus->write = r8169_mdio_write_reg;
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_40) {
+ new_bus->read_c45 = r8169_mdio_read_reg_c45;
+ new_bus->write_c45 = r8169_mdio_write_reg_c45;
+ }
+
ret = devm_mdiobus_register(&pdev->dev, new_bus);
if (ret)
return ret;
@@ -5251,9 +5285,9 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
/* mimic behavior of r8125/r8126 vendor drivers */
if (tp->mac_version == RTL_GIGA_MAC_VER_61)
- phy_set_eee_broken(tp->phydev,
- ETHTOOL_LINK_MODE_2500baseT_Full_BIT);
- phy_set_eee_broken(tp->phydev, ETHTOOL_LINK_MODE_5000baseT_Full_BIT);
+ phy_disable_eee_mode(tp->phydev,
+ ETHTOOL_LINK_MODE_2500baseT_Full_BIT);
+ phy_disable_eee_mode(tp->phydev, ETHTOOL_LINK_MODE_5000baseT_Full_BIT);
/* PHY will be woken up in rtl_open() */
phy_suspend(tp->phydev);
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 84d09a8973b7..aba772e14555 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1287,17 +1287,14 @@ static struct device_node *rswitch_get_port_node(struct rswitch_device *rdev)
if (!ports)
return NULL;
- for_each_child_of_node(ports, port) {
+ for_each_available_child_of_node(ports, port) {
err = of_property_read_u32(port, "reg", &index);
if (err < 0) {
port = NULL;
goto out;
}
- if (index == rdev->etha->index) {
- if (!of_device_is_available(port))
- port = NULL;
+ if (index == rdev->etha->index)
break;
- }
}
out:
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 3eb55dcfa8a6..c4c43434f314 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -38,8 +38,9 @@ config SFC_MTD
default y
help
This exposes the on-board flash and/or EEPROM as MTD devices
- (e.g. /dev/mtd1). This is required to update the firmware or
- the boot configuration under Linux.
+ (e.g. /dev/mtd1). This is required to update the boot
+ configuration under Linux, or use some older userland tools to
+ update the firmware.
config SFC_MCDI_MON
bool "Solarflare SFC9100-family hwmon support"
depends on SFC && HWMON && !(SFC=y && HWMON=m)
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index 8f446b9bd5ee..d99039ec468d 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -7,7 +7,7 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \
mcdi_functions.o mcdi_filters.o mcdi_mon.o \
ef100.o ef100_nic.o ef100_netdev.o \
ef100_ethtool.o ef100_rx.o ef100_tx.o \
- efx_devlink.o
+ efx_devlink.o efx_reflash.o
sfc-$(CONFIG_SFC_MTD) += mtd.o
sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \
mae.o tc.o tc_bindings.o tc_counters.o \
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 452009ed7a43..47d78abecf30 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3501,7 +3501,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN);
MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX);
const struct efx_ef10_nvram_type_info *info;
- size_t size, erase_size, outlen;
+ size_t size, erase_size, write_size, outlen;
int type_idx = 0;
bool protected;
int rc;
@@ -3516,7 +3516,8 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
if (info->port != efx_port_num(efx))
return -ENODEV;
- rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected);
+ rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &write_size,
+ &protected);
if (rc)
return rc;
if (protected &&
@@ -3561,6 +3562,8 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
if (!erase_size)
part->common.mtd.flags |= MTD_NO_ERASE;
+ part->common.mtd.writesize = write_size;
+
return 0;
}
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index c88ec3e24836..5a14d94163b1 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -1003,6 +1003,7 @@ int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev)
INIT_LIST_HEAD(&efx->vf_reps);
INIT_WORK(&efx->mac_work, efx_mac_work);
init_waitqueue_head(&efx->flush_wq);
+ mutex_init(&efx->reflash_mutex);
efx->tx_queues_per_channel = 1;
efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE;
diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c
index 3cd750820fdd..d842c60dfc10 100644
--- a/drivers/net/ethernet/sfc/efx_devlink.c
+++ b/drivers/net/ethernet/sfc/efx_devlink.c
@@ -19,6 +19,7 @@
#include "mae.h"
#include "ef100_rep.h"
#endif
+#include "efx_reflash.h"
struct efx_devlink {
struct efx_nic *efx;
@@ -615,7 +616,19 @@ static int efx_devlink_info_get(struct devlink *devlink,
return 0;
}
+static int efx_devlink_flash_update(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct efx_devlink *devlink_private = devlink_priv(devlink);
+ struct efx_nic *efx = devlink_private->efx;
+
+ return efx_reflash_flash_firmware(efx, params->fw, extack);
+}
+
static const struct devlink_ops sfc_devlink_ops = {
+ .supported_flash_update_params = 0,
+ .flash_update = efx_devlink_flash_update,
.info_get = efx_devlink_info_get,
};
diff --git a/drivers/net/ethernet/sfc/efx_reflash.c b/drivers/net/ethernet/sfc/efx_reflash.c
new file mode 100644
index 000000000000..ddc53740f098
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_reflash.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for AMD network controllers and boards
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/crc32.h>
+#include <net/devlink.h>
+#include "efx_reflash.h"
+#include "net_driver.h"
+#include "fw_formats.h"
+#include "mcdi_pcol.h"
+#include "mcdi.h"
+
+/* Try to parse a Reflash header at the specified offset */
+static bool efx_reflash_parse_reflash_header(const struct firmware *fw,
+ size_t header_offset, u32 *type,
+ u32 *subtype, const u8 **data,
+ size_t *data_size)
+{
+ size_t header_end, trailer_offset, trailer_end;
+ u32 magic, version, payload_size, header_len;
+ const u8 *header, *trailer;
+ u32 expected_crc, crc;
+
+ if (check_add_overflow(header_offset, EFX_REFLASH_HEADER_LENGTH_OFST +
+ EFX_REFLASH_HEADER_LENGTH_LEN,
+ &header_end))
+ return false;
+ if (fw->size < header_end)
+ return false;
+
+ header = fw->data + header_offset;
+ magic = get_unaligned_le32(header + EFX_REFLASH_HEADER_MAGIC_OFST);
+ if (magic != EFX_REFLASH_HEADER_MAGIC_VALUE)
+ return false;
+
+ version = get_unaligned_le32(header + EFX_REFLASH_HEADER_VERSION_OFST);
+ if (version != EFX_REFLASH_HEADER_VERSION_VALUE)
+ return false;
+
+ payload_size = get_unaligned_le32(header + EFX_REFLASH_HEADER_PAYLOAD_SIZE_OFST);
+ header_len = get_unaligned_le32(header + EFX_REFLASH_HEADER_LENGTH_OFST);
+ if (check_add_overflow(header_offset, header_len, &trailer_offset) ||
+ check_add_overflow(trailer_offset, payload_size, &trailer_offset) ||
+ check_add_overflow(trailer_offset, EFX_REFLASH_TRAILER_LEN,
+ &trailer_end))
+ return false;
+ if (fw->size < trailer_end)
+ return false;
+
+ trailer = fw->data + trailer_offset;
+ expected_crc = get_unaligned_le32(trailer + EFX_REFLASH_TRAILER_CRC_OFST);
+ /* Addition could overflow u32, but not size_t since we already
+ * checked trailer_offset didn't overflow. So cast to size_t first.
+ */
+ crc = crc32_le(0, header, (size_t)header_len + payload_size);
+ if (crc != expected_crc)
+ return false;
+
+ *type = get_unaligned_le32(header + EFX_REFLASH_HEADER_FIRMWARE_TYPE_OFST);
+ *subtype = get_unaligned_le32(header + EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_OFST);
+ if (*type == EFX_REFLASH_FIRMWARE_TYPE_BUNDLE) {
+ /* All the bundle data is written verbatim to NVRAM */
+ *data = fw->data;
+ *data_size = fw->size;
+ } else {
+ /* Other payload types strip the reflash header and trailer
+ * from the data written to NVRAM
+ */
+ *data = header + header_len;
+ *data_size = payload_size;
+ }
+
+ return true;
+}
+
+/* Map from FIRMWARE_TYPE to NVRAM_PARTITION_TYPE */
+static int efx_reflash_partition_type(u32 type, u32 subtype,
+ u32 *partition_type,
+ u32 *partition_subtype)
+{
+ int rc = 0;
+
+ switch (type) {
+ case EFX_REFLASH_FIRMWARE_TYPE_BOOTROM:
+ *partition_type = NVRAM_PARTITION_TYPE_EXPANSION_ROM;
+ *partition_subtype = subtype;
+ break;
+ case EFX_REFLASH_FIRMWARE_TYPE_BUNDLE:
+ *partition_type = NVRAM_PARTITION_TYPE_BUNDLE;
+ *partition_subtype = subtype;
+ break;
+ default:
+ /* Not supported */
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+/* Try to parse a SmartNIC image header at the specified offset */
+static bool efx_reflash_parse_snic_header(const struct firmware *fw,
+ size_t header_offset,
+ u32 *partition_type,
+ u32 *partition_subtype,
+ const u8 **data, size_t *data_size)
+{
+ u32 magic, version, payload_size, header_len, expected_crc, crc;
+ size_t header_end, payload_end;
+ const u8 *header;
+
+ if (check_add_overflow(header_offset, EFX_SNICIMAGE_HEADER_MINLEN,
+ &header_end) ||
+ fw->size < header_end)
+ return false;
+
+ header = fw->data + header_offset;
+ magic = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_MAGIC_OFST);
+ if (magic != EFX_SNICIMAGE_HEADER_MAGIC_VALUE)
+ return false;
+
+ version = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_VERSION_OFST);
+ if (version != EFX_SNICIMAGE_HEADER_VERSION_VALUE)
+ return false;
+
+ header_len = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_LENGTH_OFST);
+ if (check_add_overflow(header_offset, header_len, &header_end))
+ return false;
+ payload_size = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_OFST);
+ if (check_add_overflow(header_end, payload_size, &payload_end) ||
+ fw->size < payload_end)
+ return false;
+
+ expected_crc = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_CRC_OFST);
+
+ /* Calculate CRC omitting the expected CRC field itself */
+ crc = crc32_le(~0, header, EFX_SNICIMAGE_HEADER_CRC_OFST);
+ crc = ~crc32_le(crc,
+ header + EFX_SNICIMAGE_HEADER_CRC_OFST +
+ EFX_SNICIMAGE_HEADER_CRC_LEN,
+ header_len + payload_size - EFX_SNICIMAGE_HEADER_CRC_OFST -
+ EFX_SNICIMAGE_HEADER_CRC_LEN);
+ if (crc != expected_crc)
+ return false;
+
+ *partition_type =
+ get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PARTITION_TYPE_OFST);
+ *partition_subtype =
+ get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_OFST);
+ *data = fw->data;
+ *data_size = fw->size;
+ return true;
+}
+
+/* Try to parse a SmartNIC bundle header at the specified offset */
+static bool efx_reflash_parse_snic_bundle_header(const struct firmware *fw,
+ size_t header_offset,
+ u32 *partition_type,
+ u32 *partition_subtype,
+ const u8 **data,
+ size_t *data_size)
+{
+ u32 magic, version, bundle_type, header_len, expected_crc, crc;
+ size_t header_end;
+ const u8 *header;
+
+ if (check_add_overflow(header_offset, EFX_SNICBUNDLE_HEADER_LEN,
+ &header_end))
+ return false;
+ if (fw->size < header_end)
+ return false;
+
+ header = fw->data + header_offset;
+ magic = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_MAGIC_OFST);
+ if (magic != EFX_SNICBUNDLE_HEADER_MAGIC_VALUE)
+ return false;
+
+ version = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_VERSION_OFST);
+ if (version != EFX_SNICBUNDLE_HEADER_VERSION_VALUE)
+ return false;
+
+ bundle_type = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_OFST);
+ if (bundle_type != NVRAM_PARTITION_TYPE_BUNDLE)
+ return false;
+
+ header_len = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_LENGTH_OFST);
+ if (header_len != EFX_SNICBUNDLE_HEADER_LEN)
+ return false;
+
+ expected_crc = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_CRC_OFST);
+ crc = ~crc32_le(~0, header, EFX_SNICBUNDLE_HEADER_CRC_OFST);
+ if (crc != expected_crc)
+ return false;
+
+ *partition_type = NVRAM_PARTITION_TYPE_BUNDLE;
+ *partition_subtype = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_OFST);
+ *data = fw->data;
+ *data_size = fw->size;
+ return true;
+}
+
+/* Try to find a valid firmware payload in the firmware data.
+ * When we recognise a valid header, we parse it for the partition type
+ * (so we know where to ask the MC to write it to) and the location of
+ * the data blob to write.
+ */
+static int efx_reflash_parse_firmware_data(const struct firmware *fw,
+ u32 *partition_type,
+ u32 *partition_subtype,
+ const u8 **data, size_t *data_size)
+{
+ size_t header_offset;
+ u32 type, subtype;
+
+ /* Some packaging formats (such as CMS/PKCS#7 signed images)
+ * prepend a header for which finding the size is a non-trivial
+ * task, so step through the firmware data until we find a valid
+ * header.
+ *
+ * The checks are intended to reject firmware data that is clearly not
+ * in the expected format. They do not need to be exhaustive as the
+ * running firmware will perform its own comprehensive validity and
+ * compatibility checks during the update procedure.
+ *
+ * Firmware packages may contain multiple reflash images, e.g. a
+ * bundle containing one or more other images. Only check the
+ * outermost container by stopping after the first candidate image
+ * found even it is for an unsupported partition type.
+ */
+ for (header_offset = 0; header_offset < fw->size; header_offset++) {
+ if (efx_reflash_parse_snic_bundle_header(fw, header_offset,
+ partition_type,
+ partition_subtype,
+ data, data_size))
+ return 0;
+
+ if (efx_reflash_parse_snic_header(fw, header_offset,
+ partition_type,
+ partition_subtype, data,
+ data_size))
+ return 0;
+
+ if (efx_reflash_parse_reflash_header(fw, header_offset, &type,
+ &subtype, data, data_size))
+ return efx_reflash_partition_type(type, subtype,
+ partition_type,
+ partition_subtype);
+ }
+
+ return -EINVAL;
+}
+
+/* Limit the number of status updates during the erase or write phases */
+#define EFX_DEVLINK_STATUS_UPDATE_COUNT 50
+
+/* Expected timeout for the efx_mcdi_nvram_update_finish_polled() */
+#define EFX_DEVLINK_UPDATE_FINISH_TIMEOUT 900
+
+/* Ideal erase chunk size. This is a balance between minimising the number of
+ * MCDI requests to erase an entire partition whilst avoiding tripping the MCDI
+ * RPC timeout.
+ */
+#define EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE (64 * 1024)
+
+static int efx_reflash_erase_partition(struct efx_nic *efx,
+ struct netlink_ext_ack *extack,
+ struct devlink *devlink, u32 type,
+ size_t partition_size,
+ size_t align)
+{
+ size_t chunk, offset, next_update;
+ int rc;
+
+ /* Partitions that cannot be erased or do not require erase before
+ * write are advertised with a erase alignment/sector size of zero.
+ */
+ if (align == 0)
+ /* Nothing to do */
+ return 0;
+
+ if (partition_size % align)
+ return -EINVAL;
+
+ /* Erase the entire NVRAM partition a chunk at a time to avoid
+ * potentially tripping the MCDI RPC timeout.
+ */
+ if (align >= EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE)
+ chunk = align;
+ else
+ chunk = rounddown(EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE, align);
+
+ for (offset = 0, next_update = 0; offset < partition_size; offset += chunk) {
+ if (offset >= next_update) {
+ devlink_flash_update_status_notify(devlink, "Erasing",
+ NULL, offset,
+ partition_size);
+ next_update += partition_size / EFX_DEVLINK_STATUS_UPDATE_COUNT;
+ }
+
+ chunk = min_t(size_t, partition_size - offset, chunk);
+ rc = efx_mcdi_nvram_erase(efx, type, offset, chunk);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Erase failed for NVRAM partition %#x at %#zx-%#zx",
+ type, offset, offset + chunk - 1);
+ return rc;
+ }
+ }
+
+ devlink_flash_update_status_notify(devlink, "Erasing", NULL,
+ partition_size, partition_size);
+
+ return 0;
+}
+
+static int efx_reflash_write_partition(struct efx_nic *efx,
+ struct netlink_ext_ack *extack,
+ struct devlink *devlink, u32 type,
+ const u8 *data, size_t data_size,
+ size_t align)
+{
+ size_t write_max, chunk, offset, next_update;
+ int rc;
+
+ if (align == 0)
+ return -EINVAL;
+
+ /* Write the NVRAM partition in chunks that are the largest multiple
+ * of the partition's required write alignment that will fit into the
+ * MCDI NVRAM_WRITE RPC payload.
+ */
+ if (efx->type->mcdi_max_ver < 2)
+ write_max = MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN *
+ MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM;
+ else
+ write_max = MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN *
+ MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM_MCDI2;
+ chunk = rounddown(write_max, align);
+
+ for (offset = 0, next_update = 0; offset + chunk <= data_size; offset += chunk) {
+ if (offset >= next_update) {
+ devlink_flash_update_status_notify(devlink, "Writing",
+ NULL, offset,
+ data_size);
+ next_update += data_size / EFX_DEVLINK_STATUS_UPDATE_COUNT;
+ }
+
+ rc = efx_mcdi_nvram_write(efx, type, offset, data + offset, chunk);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Write failed for NVRAM partition %#x at %#zx-%#zx",
+ type, offset, offset + chunk - 1);
+ return rc;
+ }
+ }
+
+ /* Round up left over data to satisfy write alignment */
+ if (offset < data_size) {
+ size_t remaining = data_size - offset;
+ u8 *buf;
+
+ if (offset >= next_update)
+ devlink_flash_update_status_notify(devlink, "Writing",
+ NULL, offset,
+ data_size);
+
+ chunk = roundup(remaining, align);
+ buf = kmalloc(chunk, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy(buf, data + offset, remaining);
+ memset(buf + remaining, 0xFF, chunk - remaining);
+ rc = efx_mcdi_nvram_write(efx, type, offset, buf, chunk);
+ kfree(buf);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Write failed for NVRAM partition %#x at %#zx-%#zx",
+ type, offset, offset + chunk - 1);
+ return rc;
+ }
+ }
+
+ devlink_flash_update_status_notify(devlink, "Writing", NULL, data_size,
+ data_size);
+
+ return 0;
+}
+
+int efx_reflash_flash_firmware(struct efx_nic *efx, const struct firmware *fw,
+ struct netlink_ext_ack *extack)
+{
+ size_t data_size, size, erase_align, write_align;
+ struct devlink *devlink = efx->devlink;
+ u32 type, data_subtype, subtype;
+ const u8 *data;
+ bool protected;
+ int rc;
+
+ if (!efx_has_cap(efx, BUNDLE_UPDATE)) {
+ NL_SET_ERR_MSG_MOD(extack, "NVRAM bundle updates are not supported by the firmware");
+ return -EOPNOTSUPP;
+ }
+
+ devlink_flash_update_status_notify(devlink, "Checking update", NULL, 0, 0);
+
+ rc = efx_reflash_parse_firmware_data(fw, &type, &data_subtype, &data,
+ &data_size);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Firmware image validation check failed");
+ goto out;
+ }
+
+ mutex_lock(&efx->reflash_mutex);
+
+ rc = efx_mcdi_nvram_metadata(efx, type, &subtype, NULL, NULL, 0);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Metadata query for NVRAM partition %#x failed",
+ type);
+ goto out_unlock;
+ }
+
+ if (subtype != data_subtype) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Firmware image is not appropriate for this adapter");
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+
+ rc = efx_mcdi_nvram_info(efx, type, &size, &erase_align, &write_align,
+ &protected);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Info query for NVRAM partition %#x failed",
+ type);
+ goto out_unlock;
+ }
+
+ if (protected) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "NVRAM partition %#x is protected",
+ type);
+ rc = -EPERM;
+ goto out_unlock;
+ }
+
+ if (write_align == 0) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "NVRAM partition %#x is not writable",
+ type);
+ rc = -EACCES;
+ goto out_unlock;
+ }
+
+ if (erase_align != 0 && size % erase_align) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "NVRAM partition %#x has a bad partition table entry, can't erase it",
+ type);
+ rc = -EACCES;
+ goto out_unlock;
+ }
+
+ if (data_size > size) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Firmware image is too big for NVRAM partition %#x",
+ type);
+ rc = -EFBIG;
+ goto out_unlock;
+ }
+
+ devlink_flash_update_status_notify(devlink, "Starting update", NULL, 0, 0);
+
+ rc = efx_mcdi_nvram_update_start(efx, type);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Update start request for NVRAM partition %#x failed",
+ type);
+ goto out_unlock;
+ }
+
+ rc = efx_reflash_erase_partition(efx, extack, devlink, type, size,
+ erase_align);
+ if (rc)
+ goto out_update_finish;
+
+ rc = efx_reflash_write_partition(efx, extack, devlink, type, data,
+ data_size, write_align);
+ if (rc)
+ goto out_update_finish;
+
+ devlink_flash_update_timeout_notify(devlink, "Finishing update", NULL,
+ EFX_DEVLINK_UPDATE_FINISH_TIMEOUT);
+
+out_update_finish:
+ if (rc)
+ /* Don't obscure the return code from an earlier failure */
+ efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_ABORT);
+ else
+ rc = efx_mcdi_nvram_update_finish_polled(efx, type);
+out_unlock:
+ mutex_unlock(&efx->reflash_mutex);
+out:
+ devlink_flash_update_status_notify(devlink, rc ? "Update failed" :
+ "Update complete",
+ NULL, 0, 0);
+ return rc;
+}
diff --git a/drivers/net/ethernet/sfc/efx_reflash.h b/drivers/net/ethernet/sfc/efx_reflash.h
new file mode 100644
index 000000000000..3dffac565161
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_reflash.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for AMD network controllers and boards
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef _EFX_REFLASH_H
+#define _EFX_REFLASH_H
+
+#include "net_driver.h"
+#include <linux/firmware.h>
+
+int efx_reflash_flash_firmware(struct efx_nic *efx, const struct firmware *fw,
+ struct netlink_ext_ack *extack);
+
+#endif /* _EFX_REFLASH_H */
diff --git a/drivers/net/ethernet/sfc/fw_formats.h b/drivers/net/ethernet/sfc/fw_formats.h
new file mode 100644
index 000000000000..cbc350c96013
--- /dev/null
+++ b/drivers/net/ethernet/sfc/fw_formats.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for AMD network controllers and boards
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef _EFX_FW_FORMATS_H
+#define _EFX_FW_FORMATS_H
+
+/* Header layouts of firmware update images recognised by Efx NICs.
+ * The sources-of-truth for these layouts are AMD internal documents
+ * and sfregistry headers, neither of which are available externally
+ * nor usable directly by the driver.
+ *
+ * While each format includes a 'magic number', these are at different
+ * offsets in the various formats, and a legal header for one format
+ * could have the right value in whichever field occupies that offset
+ * to match another format's magic.
+ * Besides, some packaging formats (such as CMS/PKCS#7 signed images)
+ * prepend a header for which finding the size is a non-trivial task;
+ * rather than trying to parse those headers, we search byte-by-byte
+ * through the provided firmware image looking for a valid header.
+ * Thus, format recognition has to include validation of the checksum
+ * field, even though the firmware will validate that itself before
+ * applying the image.
+ */
+
+/* EF10 (Medford2, X2) "reflash" header format. Defined in SF-121352-AN */
+#define EFX_REFLASH_HEADER_MAGIC_OFST 0
+#define EFX_REFLASH_HEADER_MAGIC_LEN 4
+#define EFX_REFLASH_HEADER_MAGIC_VALUE 0x106F1A5
+
+#define EFX_REFLASH_HEADER_VERSION_OFST 4
+#define EFX_REFLASH_HEADER_VERSION_LEN 4
+#define EFX_REFLASH_HEADER_VERSION_VALUE 4
+
+#define EFX_REFLASH_HEADER_FIRMWARE_TYPE_OFST 8
+#define EFX_REFLASH_HEADER_FIRMWARE_TYPE_LEN 4
+#define EFX_REFLASH_FIRMWARE_TYPE_BOOTROM 0x2
+#define EFX_REFLASH_FIRMWARE_TYPE_BUNDLE 0xd
+
+#define EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_OFST 12
+#define EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_LEN 4
+
+#define EFX_REFLASH_HEADER_PAYLOAD_SIZE_OFST 16
+#define EFX_REFLASH_HEADER_PAYLOAD_SIZE_LEN 4
+
+#define EFX_REFLASH_HEADER_LENGTH_OFST 20
+#define EFX_REFLASH_HEADER_LENGTH_LEN 4
+
+/* Reflash trailer */
+#define EFX_REFLASH_TRAILER_CRC_OFST 0
+#define EFX_REFLASH_TRAILER_CRC_LEN 4
+
+#define EFX_REFLASH_TRAILER_LEN \
+ (EFX_REFLASH_TRAILER_CRC_OFST + EFX_REFLASH_TRAILER_CRC_LEN)
+
+/* EF100 "SmartNIC image" header format.
+ * Defined in sfregistry "src/layout/snic_image_hdr.h".
+ */
+#define EFX_SNICIMAGE_HEADER_MAGIC_OFST 16
+#define EFX_SNICIMAGE_HEADER_MAGIC_LEN 4
+#define EFX_SNICIMAGE_HEADER_MAGIC_VALUE 0x541C057A
+
+#define EFX_SNICIMAGE_HEADER_VERSION_OFST 20
+#define EFX_SNICIMAGE_HEADER_VERSION_LEN 4
+#define EFX_SNICIMAGE_HEADER_VERSION_VALUE 1
+
+#define EFX_SNICIMAGE_HEADER_LENGTH_OFST 24
+#define EFX_SNICIMAGE_HEADER_LENGTH_LEN 4
+
+#define EFX_SNICIMAGE_HEADER_PARTITION_TYPE_OFST 36
+#define EFX_SNICIMAGE_HEADER_PARTITION_TYPE_LEN 4
+
+#define EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_OFST 40
+#define EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_LEN 4
+
+#define EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_OFST 60
+#define EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_LEN 4
+
+#define EFX_SNICIMAGE_HEADER_CRC_OFST 64
+#define EFX_SNICIMAGE_HEADER_CRC_LEN 4
+
+#define EFX_SNICIMAGE_HEADER_MINLEN 256
+
+/* EF100 "SmartNIC bundle" header format. Defined in SF-122606-TC */
+#define EFX_SNICBUNDLE_HEADER_MAGIC_OFST 0
+#define EFX_SNICBUNDLE_HEADER_MAGIC_LEN 4
+#define EFX_SNICBUNDLE_HEADER_MAGIC_VALUE 0xB1001001
+
+#define EFX_SNICBUNDLE_HEADER_VERSION_OFST 4
+#define EFX_SNICBUNDLE_HEADER_VERSION_LEN 4
+#define EFX_SNICBUNDLE_HEADER_VERSION_VALUE 1
+
+#define EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_OFST 8
+#define EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_LEN 4
+
+#define EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_OFST 12
+#define EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_LEN 4
+
+#define EFX_SNICBUNDLE_HEADER_LENGTH_OFST 20
+#define EFX_SNICBUNDLE_HEADER_LENGTH_LEN 4
+
+#define EFX_SNICBUNDLE_HEADER_CRC_OFST 224
+#define EFX_SNICBUNDLE_HEADER_CRC_LEN 4
+
+#define EFX_SNICBUNDLE_HEADER_LEN \
+ (EFX_SNICBUNDLE_HEADER_CRC_OFST + EFX_SNICBUNDLE_HEADER_CRC_LEN)
+
+#endif /* _EFX_FW_FORMATS_H */
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index d461b1a6ce81..dbd2ee915838 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1625,12 +1625,15 @@ fail:
return rc;
}
+#define EFX_MCDI_NVRAM_DEFAULT_WRITE_LEN 128
+
int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type,
size_t *size_out, size_t *erase_size_out,
- bool *protected_out)
+ size_t *write_size_out, bool *protected_out)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_INFO_IN_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_OUT_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_V2_OUT_LEN);
+ size_t write_size = 0;
size_t outlen;
int rc;
@@ -1645,6 +1648,12 @@ int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type,
goto fail;
}
+ if (outlen >= MC_CMD_NVRAM_INFO_V2_OUT_LEN)
+ write_size = MCDI_DWORD(outbuf, NVRAM_INFO_V2_OUT_WRITESIZE);
+ else
+ write_size = EFX_MCDI_NVRAM_DEFAULT_WRITE_LEN;
+
+ *write_size_out = write_size;
*size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_SIZE);
*erase_size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_ERASESIZE);
*protected_out = !!(MCDI_DWORD(outbuf, NVRAM_INFO_OUT_FLAGS) &
@@ -2163,11 +2172,9 @@ out_free:
return rc;
}
-#ifdef CONFIG_SFC_MTD
-
#define EFX_MCDI_NVRAM_LEN_MAX 128
-static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type)
+int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN);
int rc;
@@ -2185,6 +2192,8 @@ static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type)
return rc;
}
+#ifdef CONFIG_SFC_MTD
+
static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type,
loff_t offset, u8 *buffer, size_t length)
{
@@ -2209,13 +2218,20 @@ static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type,
return 0;
}
-static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type,
- loff_t offset, const u8 *buffer, size_t length)
+#endif /* CONFIG_SFC_MTD */
+
+int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type,
+ loff_t offset, const u8 *buffer, size_t length)
{
- MCDI_DECLARE_BUF(inbuf,
- MC_CMD_NVRAM_WRITE_IN_LEN(EFX_MCDI_NVRAM_LEN_MAX));
+ efx_dword_t *inbuf;
+ size_t inlen;
int rc;
+ inlen = ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4);
+ inbuf = kzalloc(inlen, GFP_KERNEL);
+ if (!inbuf)
+ return -ENOMEM;
+
MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_TYPE, type);
MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_OFFSET, offset);
MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_LENGTH, length);
@@ -2223,14 +2239,14 @@ static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type,
BUILD_BUG_ON(MC_CMD_NVRAM_WRITE_OUT_LEN != 0);
- rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf,
- ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4),
- NULL, 0, NULL);
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf, inlen, NULL, 0, NULL);
+ kfree(inbuf);
+
return rc;
}
-static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type,
- loff_t offset, size_t length)
+int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, loff_t offset,
+ size_t length)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_ERASE_IN_LEN);
int rc;
@@ -2246,7 +2262,8 @@ static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type,
return rc;
}
-static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type)
+int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type,
+ enum efx_update_finish_mode mode)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN);
MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN);
@@ -2254,22 +2271,41 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type)
int rc, rc2;
MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type);
- /* Always set this flag. Old firmware ignores it */
- MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS,
+
+ /* Old firmware doesn't support background update finish and abort
+ * operations. Fallback to waiting if the requested mode is not
+ * supported.
+ */
+ if (!efx_has_cap(efx, NVRAM_UPDATE_POLL_VERIFY_RESULT) ||
+ (!efx_has_cap(efx, NVRAM_UPDATE_ABORT_SUPPORTED) &&
+ mode == EFX_UPDATE_FINISH_ABORT))
+ mode = EFX_UPDATE_FINISH_WAIT;
+
+ MCDI_POPULATE_DWORD_4(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS,
NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT,
- 1);
+ (mode != EFX_UPDATE_FINISH_ABORT),
+ NVRAM_UPDATE_FINISH_V2_IN_FLAG_RUN_IN_BACKGROUND,
+ (mode == EFX_UPDATE_FINISH_BACKGROUND),
+ NVRAM_UPDATE_FINISH_V2_IN_FLAG_POLL_VERIFY_RESULT,
+ (mode == EFX_UPDATE_FINISH_POLL),
+ NVRAM_UPDATE_FINISH_V2_IN_FLAG_ABORT,
+ (mode == EFX_UPDATE_FINISH_ABORT));
rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf),
outbuf, sizeof(outbuf), &outlen);
if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) {
rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE);
- if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS)
+ if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS &&
+ rc2 != MC_CMD_NVRAM_VERIFY_RC_PENDING)
netif_err(efx, drv, efx->net_dev,
"NVRAM update failed verification with code 0x%x\n",
rc2);
switch (rc2) {
case MC_CMD_NVRAM_VERIFY_RC_SUCCESS:
break;
+ case MC_CMD_NVRAM_VERIFY_RC_PENDING:
+ rc = -EAGAIN;
+ break;
case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED:
case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED:
case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED:
@@ -2284,6 +2320,8 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type)
case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES:
case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS:
case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH:
+ case MC_CMD_NVRAM_VERIFY_RC_REJECT_TEST_SIGNED:
+ case MC_CMD_NVRAM_VERIFY_RC_SECURITY_LEVEL_DOWNGRADE:
rc = -EPERM;
break;
default:
@@ -2296,6 +2334,42 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type)
return rc;
}
+#define EFX_MCDI_NVRAM_UPDATE_FINISH_INITIAL_POLL_DELAY_MS 5
+#define EFX_MCDI_NVRAM_UPDATE_FINISH_MAX_POLL_DELAY_MS 5000
+#define EFX_MCDI_NVRAM_UPDATE_FINISH_RETRIES 185
+
+int efx_mcdi_nvram_update_finish_polled(struct efx_nic *efx, unsigned int type)
+{
+ unsigned int delay = EFX_MCDI_NVRAM_UPDATE_FINISH_INITIAL_POLL_DELAY_MS;
+ unsigned int retry = 0;
+ int rc;
+
+ /* NVRAM updates can take a long time (e.g. up to 1 minute for bundle
+ * images). Polling for NVRAM update completion ensures that other MCDI
+ * commands can be issued before the background NVRAM update completes.
+ *
+ * The initial call either completes the update synchronously, or
+ * returns -EAGAIN to indicate processing is continuing. In the latter
+ * case, we poll for at least 900 seconds, at increasing intervals
+ * (5ms, 50ms, 500ms, 5s).
+ */
+ rc = efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_BACKGROUND);
+ while (rc == -EAGAIN) {
+ if (retry > EFX_MCDI_NVRAM_UPDATE_FINISH_RETRIES)
+ return -ETIMEDOUT;
+ retry++;
+
+ msleep(delay);
+ if (delay < EFX_MCDI_NVRAM_UPDATE_FINISH_MAX_POLL_DELAY_MS)
+ delay *= 10;
+
+ rc = efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_POLL);
+ }
+ return rc;
+}
+
+#ifdef CONFIG_SFC_MTD
+
int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start,
size_t len, size_t *retlen, u8 *buffer)
{
@@ -2389,7 +2463,8 @@ int efx_mcdi_mtd_sync(struct mtd_info *mtd)
if (part->updating) {
part->updating = false;
- rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type);
+ rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type,
+ EFX_UPDATE_FINISH_WAIT);
}
return rc;
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index cdb17d7c147f..3755cd3fe1e6 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -392,7 +392,7 @@ int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, u32 dest_evq);
int efx_mcdi_nvram_types(struct efx_nic *efx, u32 *nvram_types_out);
int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type,
size_t *size_out, size_t *erase_size_out,
- bool *protected_out);
+ size_t *write_size_out, bool *protected_out);
int efx_new_mcdi_nvram_test_all(struct efx_nic *efx);
int efx_mcdi_nvram_metadata(struct efx_nic *efx, unsigned int type,
u32 *subtype, u16 version[4], char *desc,
@@ -424,6 +424,26 @@ static inline int efx_mcdi_mon_probe(struct efx_nic *efx) { return 0; }
static inline void efx_mcdi_mon_remove(struct efx_nic *efx) {}
#endif
+int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type);
+int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type,
+ loff_t offset, const u8 *buffer, size_t length);
+int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type,
+ loff_t offset, size_t length);
+int efx_mcdi_nvram_metadata(struct efx_nic *efx, unsigned int type,
+ u32 *subtype, u16 version[4], char *desc,
+ size_t descsize);
+
+enum efx_update_finish_mode {
+ EFX_UPDATE_FINISH_WAIT,
+ EFX_UPDATE_FINISH_BACKGROUND,
+ EFX_UPDATE_FINISH_POLL,
+ EFX_UPDATE_FINISH_ABORT,
+};
+
+int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type,
+ enum efx_update_finish_mode mode);
+int efx_mcdi_nvram_update_finish_polled(struct efx_nic *efx, unsigned int type);
+
#ifdef CONFIG_SFC_MTD
int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, size_t len,
size_t *retlen, u8 *buffer);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index f70a7b7d6345..8b0689f749b5 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1006,6 +1006,7 @@ struct efx_mae;
* @dl_port: devlink port associated with the PF
* @mem_bar: The BAR that is mapped into membase.
* @reg_base: Offset from the start of the bar to the function control window.
+ * @reflash_mutex: Mutex for serialising firmware reflash operations.
* @monitor_work: Hardware monitor workitem
* @biu_lock: BIU (bus interface unit) lock
* @last_irq_cpu: Last CPU to handle a possible test interrupt. This
@@ -1191,6 +1192,7 @@ struct efx_nic {
struct devlink_port *dl_port;
unsigned int mem_bar;
u32 reg_base;
+ struct mutex reflash_mutex;
/* The following fields may be written more often */
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e25db747a81a..55053528e498 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -530,6 +530,20 @@ struct dma_features {
#define STMMAC_DEFAULT_TWT_LS 0x1E
#define STMMAC_ET_MAX 0xFFFFF
+/* Common LPI register bits */
+#define LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable, gmac4, xgmac2 only */
+#define LPI_CTRL_STATUS_LPIATE BIT(20) /* LPI Timer Enable, gmac4 only */
+#define LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */
+#define LPI_CTRL_STATUS_PLSEN BIT(18) /* Enable PHY Link Status */
+#define LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */
+#define LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */
+#define LPI_CTRL_STATUS_RLPIST BIT(9) /* Receive LPI state, gmac1000 only? */
+#define LPI_CTRL_STATUS_TLPIST BIT(8) /* Transmit LPI state, gmac1000 only? */
+#define LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */
+#define LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */
+#define LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */
+#define LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */
+
#define STMMAC_CHAIN_MODE 0x1
#define STMMAC_RING_MODE 0x2
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index bd4eb187f8c6..392574bdd4a4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -29,10 +29,8 @@ struct tegra_eqos {
void __iomem *regs;
struct reset_control *rst;
- struct clk *clk_master;
struct clk *clk_slave;
struct clk *clk_tx;
- struct clk *clk_rx;
struct gpio_desc *reset;
};
@@ -123,49 +121,14 @@ static int dwc_qos_probe(struct platform_device *pdev,
struct plat_stmmacenet_data *plat_dat,
struct stmmac_resources *stmmac_res)
{
- int err;
-
- plat_dat->stmmac_clk = devm_clk_get(&pdev->dev, "apb_pclk");
- if (IS_ERR(plat_dat->stmmac_clk)) {
- dev_err(&pdev->dev, "apb_pclk clock not found.\n");
- return PTR_ERR(plat_dat->stmmac_clk);
- }
-
- err = clk_prepare_enable(plat_dat->stmmac_clk);
- if (err < 0) {
- dev_err(&pdev->dev, "failed to enable apb_pclk clock: %d\n",
- err);
- return err;
- }
-
- plat_dat->pclk = devm_clk_get(&pdev->dev, "phy_ref_clk");
- if (IS_ERR(plat_dat->pclk)) {
- dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
- err = PTR_ERR(plat_dat->pclk);
- goto disable;
- }
-
- err = clk_prepare_enable(plat_dat->pclk);
- if (err < 0) {
- dev_err(&pdev->dev, "failed to enable phy_ref clock: %d\n",
- err);
- goto disable;
+ for (int i = 0; i < plat_dat->num_clks; i++) {
+ if (strcmp(plat_dat->clks[i].id, "apb_pclk") == 0)
+ plat_dat->stmmac_clk = plat_dat->clks[i].clk;
+ else if (strcmp(plat_dat->clks[i].id, "phy_ref_clk") == 0)
+ plat_dat->pclk = plat_dat->clks[i].clk;
}
return 0;
-
-disable:
- clk_disable_unprepare(plat_dat->stmmac_clk);
- return err;
-}
-
-static void dwc_qos_remove(struct platform_device *pdev)
-{
- struct net_device *ndev = platform_get_drvdata(pdev);
- struct stmmac_priv *priv = netdev_priv(ndev);
-
- clk_disable_unprepare(priv->plat->pclk);
- clk_disable_unprepare(priv->plat->stmmac_clk);
}
#define SDMEMCOMPPADCTRL 0x8800
@@ -178,7 +141,7 @@ static void dwc_qos_remove(struct platform_device *pdev)
#define AUTO_CAL_STATUS 0x880c
#define AUTO_CAL_STATUS_ACTIVE BIT(31)
-static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mode)
+static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode)
{
struct tegra_eqos *eqos = priv;
bool needs_calibration = false;
@@ -197,7 +160,7 @@ static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mo
break;
default:
- dev_err(eqos->dev, "invalid speed %u\n", speed);
+ dev_err(eqos->dev, "invalid speed %d\n", speed);
break;
}
@@ -278,52 +241,19 @@ static int tegra_eqos_probe(struct platform_device *pdev,
if (!is_of_node(dev->fwnode))
goto bypass_clk_reset_gpio;
- eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus");
- if (IS_ERR(eqos->clk_master)) {
- err = PTR_ERR(eqos->clk_master);
- goto error;
- }
-
- err = clk_prepare_enable(eqos->clk_master);
- if (err < 0)
- goto error;
-
- eqos->clk_slave = devm_clk_get(&pdev->dev, "slave_bus");
- if (IS_ERR(eqos->clk_slave)) {
- err = PTR_ERR(eqos->clk_slave);
- goto disable_master;
- }
-
- data->stmmac_clk = eqos->clk_slave;
-
- err = clk_prepare_enable(eqos->clk_slave);
- if (err < 0)
- goto disable_master;
-
- eqos->clk_rx = devm_clk_get(&pdev->dev, "rx");
- if (IS_ERR(eqos->clk_rx)) {
- err = PTR_ERR(eqos->clk_rx);
- goto disable_slave;
- }
-
- err = clk_prepare_enable(eqos->clk_rx);
- if (err < 0)
- goto disable_slave;
-
- eqos->clk_tx = devm_clk_get(&pdev->dev, "tx");
- if (IS_ERR(eqos->clk_tx)) {
- err = PTR_ERR(eqos->clk_tx);
- goto disable_rx;
+ for (int i = 0; i < data->num_clks; i++) {
+ if (strcmp(data->clks[i].id, "slave_bus") == 0) {
+ eqos->clk_slave = data->clks[i].clk;
+ data->stmmac_clk = eqos->clk_slave;
+ } else if (strcmp(data->clks[i].id, "tx") == 0) {
+ eqos->clk_tx = data->clks[i].clk;
+ }
}
- err = clk_prepare_enable(eqos->clk_tx);
- if (err < 0)
- goto disable_rx;
-
eqos->reset = devm_gpiod_get(&pdev->dev, "phy-reset", GPIOD_OUT_HIGH);
if (IS_ERR(eqos->reset)) {
err = PTR_ERR(eqos->reset);
- goto disable_tx;
+ return err;
}
usleep_range(2000, 4000);
@@ -365,15 +295,7 @@ reset:
reset_control_assert(eqos->rst);
reset_phy:
gpiod_set_value(eqos->reset, 1);
-disable_tx:
- clk_disable_unprepare(eqos->clk_tx);
-disable_rx:
- clk_disable_unprepare(eqos->clk_rx);
-disable_slave:
- clk_disable_unprepare(eqos->clk_slave);
-disable_master:
- clk_disable_unprepare(eqos->clk_master);
-error:
+
return err;
}
@@ -383,10 +305,6 @@ static void tegra_eqos_remove(struct platform_device *pdev)
reset_control_assert(eqos->rst);
gpiod_set_value(eqos->reset, 1);
- clk_disable_unprepare(eqos->clk_tx);
- clk_disable_unprepare(eqos->clk_rx);
- clk_disable_unprepare(eqos->clk_slave);
- clk_disable_unprepare(eqos->clk_master);
}
struct dwc_eth_dwmac_data {
@@ -398,7 +316,6 @@ struct dwc_eth_dwmac_data {
static const struct dwc_eth_dwmac_data dwc_qos_data = {
.probe = dwc_qos_probe,
- .remove = dwc_qos_remove,
};
static const struct dwc_eth_dwmac_data tegra_eqos_data = {
@@ -434,9 +351,19 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
+ ret = devm_clk_bulk_get_all(&pdev->dev, &plat_dat->clks);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "Failed to retrieve all required clocks\n");
+ plat_dat->num_clks = ret;
+
+ ret = clk_bulk_prepare_enable(plat_dat->num_clks, plat_dat->clks);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to enable clocks\n");
+
ret = data->probe(pdev, plat_dat, &stmmac_res);
if (ret < 0) {
dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n");
+ clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks);
return ret;
}
@@ -451,7 +378,8 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
return ret;
remove:
- data->remove(pdev);
+ if (data->remove)
+ data->remove(pdev);
return ret;
}
@@ -459,10 +387,15 @@ remove:
static void dwc_eth_dwmac_remove(struct platform_device *pdev)
{
const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev);
+ struct plat_stmmacenet_data *plat_data = dev_get_platdata(&pdev->dev);
stmmac_dvr_remove(&pdev->dev);
- data->remove(pdev);
+ if (data->remove)
+ data->remove(pdev);
+
+ if (plat_data)
+ clk_bulk_disable_unprepare(plat_data->num_clks, plat_data->clks);
}
static const struct of_device_id dwc_eth_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 20d3a202bb8d..610204b51e3f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -51,7 +51,7 @@ struct imx_dwmac_ops {
int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat);
- void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode);
+ void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
};
struct imx_priv_data {
@@ -192,7 +192,7 @@ static void imx_dwmac_exit(struct platform_device *pdev, void *priv)
/* nothing to do now */
}
-static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode)
+static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode)
{
struct plat_stmmacenet_data *plat_dat;
struct imx_priv_data *dwmac = priv;
@@ -208,7 +208,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod
rate = rgmii_clock(speed);
if (rate < 0) {
- dev_err(dwmac->dev, "invalid speed %u\n", speed);
+ dev_err(dwmac->dev, "invalid speed %d\n", speed);
return;
}
@@ -217,7 +217,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod
dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate);
}
-static void imx93_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode)
+static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode)
{
struct imx_priv_data *dwmac = priv;
unsigned int iface;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index ddee6154d40b..0591756a2100 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -22,13 +22,13 @@ struct intel_dwmac {
};
struct intel_dwmac_data {
- void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode);
+ void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
unsigned long ptp_ref_clk_rate;
unsigned long tx_clk_rate;
bool tx_clk_en;
};
-static void kmb_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void kmb_eth_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct intel_dwmac *dwmac = priv;
long rate;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 61227dcf56dc..7f4b9c1cc32b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -112,7 +112,7 @@ struct ipq806x_gmac {
phy_interface_t phy_mode;
};
-static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, unsigned int speed)
+static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, int speed)
{
struct device *dev = &gmac->pdev->dev;
int div;
@@ -138,7 +138,7 @@ static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, unsigned int speed)
return div;
}
-static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, unsigned int speed)
+static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, int speed)
{
struct device *dev = &gmac->pdev->dev;
int div;
@@ -164,7 +164,7 @@ static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, unsigned int speed)
return div;
}
-static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed)
+static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, int speed)
{
uint32_t clk_bits, val;
int div;
@@ -260,7 +260,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
return PTR_ERR_OR_ZERO(gmac->qsgmii_csr);
}
-static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void ipq806x_gmac_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct ipq806x_gmac *gmac = priv;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index bfe6e2d631bd..60a4e3330ccd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -149,8 +149,7 @@ static struct stmmac_pci_info loongson_gmac_pci_info = {
.setup = loongson_gmac_data,
};
-static void loongson_gnet_fix_speed(void *priv, unsigned int speed,
- unsigned int mode)
+static void loongson_gnet_fix_speed(void *priv, int speed, unsigned int mode)
{
struct loongson_data *ld = (struct loongson_data *)priv;
struct net_device *ndev = dev_get_drvdata(ld->dev);
@@ -574,6 +573,9 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
if (ret)
goto err_disable_device;
+ plat->tx_fifo_size = SZ_16K * plat->tx_queues_to_use;
+ plat->rx_fifo_size = SZ_16K * plat->rx_queues_to_use;
+
if (dev_of_node(&pdev->dev))
ret = loongson_dwmac_dt_config(pdev, plat, &res);
else
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 5469fa1b429e..b115b7873cef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -22,7 +22,7 @@ struct meson_dwmac {
void __iomem *reg;
};
-static void meson6_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void meson6_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct meson_dwmac *dwmac = priv;
unsigned int val;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 2a5b38723635..192f270197c8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -111,7 +111,7 @@ struct qcom_ethqos {
unsigned int link_clk_rate;
struct clk *link_clk;
struct phy *serdes_phy;
- unsigned int speed;
+ int speed;
int serdes_speed;
phy_interface_t phy_mode;
@@ -175,7 +175,7 @@ static void rgmii_dump(void *priv)
#define RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ (5 * 1000 * 1000UL)
static void
-ethqos_update_link_clk(struct qcom_ethqos *ethqos, unsigned int speed)
+ethqos_update_link_clk(struct qcom_ethqos *ethqos, int speed)
{
if (!phy_interface_mode_is_rgmii(ethqos->phy_mode))
return;
@@ -699,7 +699,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos)
return ethqos->configure_func(ethqos);
}
-static void ethqos_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct qcom_ethqos *ethqos = priv;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index a4dc89e23a68..83d104a274c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1920,7 +1920,7 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac)
gmac_clk_enable(gmac, false);
}
-static void rk_fix_speed(void *priv, unsigned int speed, unsigned int mode)
+static void rk_fix_speed(void *priv, int speed, unsigned int mode)
{
struct rk_priv_data *bsp_priv = priv;
struct device *dev = &bsp_priv->pdev->dev;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
index 9cc0e5817416..6a498833b8ed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
@@ -100,7 +100,7 @@ static void s32_gmac_exit(struct platform_device *pdev, void *priv)
clk_disable_unprepare(gmac->rx_clk);
}
-static void s32_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void s32_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct s32_priv_data *gmac = priv;
long tx_clk_rate;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 16020b72dec8..6b78ae730466 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -61,7 +61,7 @@ struct socfpga_dwmac {
struct mdio_device *pcs_mdiodev;
};
-static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void socfpga_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
void __iomem *splitter_base = dwmac->splitter_base;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
index 0a0a363d3730..282c846dad0b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
@@ -31,7 +31,7 @@ struct starfive_dwmac {
const struct starfive_dwmac_data *data;
};
-static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void starfive_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct starfive_dwmac *dwmac = priv;
long rate;
@@ -39,7 +39,7 @@ static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigne
rate = rgmii_clock(speed);
if (rate < 0) {
- dev_err(dwmac->dev, "invalid speed %u\n", speed);
+ dev_err(dwmac->dev, "invalid speed %d\n", speed);
return;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index f25461c292fe..13b9c2a51fce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -99,12 +99,12 @@ struct sti_dwmac {
int clk_sel_reg; /* GMAC ext clk selection register */
struct regmap *regmap;
bool gmac_en;
- u32 speed;
- void (*fix_retime_src)(void *priv, unsigned int speed, unsigned int mode);
+ int speed;
+ void (*fix_retime_src)(void *priv, int speed, unsigned int mode);
};
struct sti_dwmac_of_data {
- void (*fix_retime_src)(void *priv, unsigned int speed, unsigned int mode);
+ void (*fix_retime_src)(void *priv, int speed, unsigned int mode);
};
static u32 phy_intf_sels[] = {
@@ -132,7 +132,7 @@ static u32 stih4xx_tx_retime_val[] = {
| STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK,
};
-static void stih4xx_fix_retime_src(void *priv, u32 spd, unsigned int mode)
+static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode)
{
struct sti_dwmac *dwmac = priv;
u32 src = dwmac->tx_retime_src;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 9ae318436c4a..1b1ce2888b2e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -72,7 +72,7 @@ static void sun7i_gmac_exit(struct platform_device *pdev, void *priv)
regulator_disable(gmac->regulator);
}
-static void sun7i_fix_speed(void *priv, unsigned int speed, unsigned int mode)
+static void sun7i_fix_speed(void *priv, int speed, unsigned int mode)
{
struct sunxi_priv_data *gmac = priv;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c
index dce84ed184e9..ddb1d8aba321 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c
@@ -104,7 +104,7 @@ static int thead_dwmac_set_txclk_dir(struct plat_stmmacenet_data *plat)
return 0;
}
-static void thead_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode)
+static void thead_dwmac_fix_speed(void *priv, int speed, unsigned int mode)
{
struct plat_stmmacenet_data *plat;
struct thead_dwmac *dwmac = priv;
@@ -142,7 +142,7 @@ static void thead_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int m
div = rate * 10 / GMAC_MII_RATE;
break;
default:
- dev_err(dwmac->dev, "invalid speed %u\n", speed);
+ dev_err(dwmac->dev, "invalid speed %d\n", speed);
return;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
index eccf7f537467..33cf99797df5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
@@ -54,7 +54,7 @@ struct visconti_eth {
spinlock_t lock; /* lock to protect register update */
};
-static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode)
{
struct visconti_eth *dwmac = priv;
struct net_device *netdev = dev_get_drvdata(dwmac->dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 600fea8f712f..967a16212faf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -59,22 +59,11 @@ enum power_event {
/* Energy Efficient Ethernet (EEE)
*
* LPI status, timer and control register offset
+ * For LPI control and status bit definitions, see common.h.
*/
#define LPI_CTRL_STATUS 0x0030
#define LPI_TIMER_CTRL 0x0034
-/* LPI control and status defines */
-#define LPI_CTRL_STATUS_LPITXA 0x00080000 /* Enable LPI TX Automate */
-#define LPI_CTRL_STATUS_PLSEN 0x00040000 /* Enable PHY Link Status */
-#define LPI_CTRL_STATUS_PLS 0x00020000 /* PHY Link Status */
-#define LPI_CTRL_STATUS_LPIEN 0x00010000 /* LPI Enable */
-#define LPI_CTRL_STATUS_RLPIST 0x00000200 /* Receive LPI state */
-#define LPI_CTRL_STATUS_TLPIST 0x00000100 /* Transmit LPI state */
-#define LPI_CTRL_STATUS_RLPIEX 0x00000008 /* Receive LPI Exit */
-#define LPI_CTRL_STATUS_RLPIEN 0x00000004 /* Receive LPI Entry */
-#define LPI_CTRL_STATUS_TLPIEX 0x00000002 /* Transmit LPI Exit */
-#define LPI_CTRL_STATUS_TLPIEN 0x00000001 /* Transmit LPI Entry */
-
/* GMAC HW ADDR regs */
#define GMAC_ADDR_HIGH(reg) ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \
0x00000040 + (reg * 8))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 96bcda0856ec..a8b901cdf5cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/ethtool.h>
#include <linux/io.h>
+#include <linux/string_choices.h>
#include "stmmac.h"
#include "stmmac_pcs.h"
#include "stmmac_ptp.h"
@@ -342,31 +343,24 @@ static int dwmac1000_irq_status(struct mac_device_info *hw,
return ret;
}
-static void dwmac1000_set_eee_mode(struct mac_device_info *hw,
- bool en_tx_lpi_clockgating)
+static int dwmac1000_set_lpi_mode(struct mac_device_info *hw,
+ enum stmmac_lpi_mode mode,
+ bool en_tx_lpi_clockgating, u32 et)
{
void __iomem *ioaddr = hw->pcsr;
u32 value;
- /*TODO - en_tx_lpi_clockgating treatment */
+ if (mode == STMMAC_LPI_TIMER)
+ return -EOPNOTSUPP;
- /* Enable the link status receive on RGMII, SGMII ore SMII
- * receive path and instruct the transmit to enter in LPI
- * state.
- */
value = readl(ioaddr + LPI_CTRL_STATUS);
- value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA;
+ if (mode == STMMAC_LPI_FORCED)
+ value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA;
+ else
+ value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA);
writel(value, ioaddr + LPI_CTRL_STATUS);
-}
-
-static void dwmac1000_reset_eee_mode(struct mac_device_info *hw)
-{
- void __iomem *ioaddr = hw->pcsr;
- u32 value;
- value = readl(ioaddr + LPI_CTRL_STATUS);
- value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA);
- writel(value, ioaddr + LPI_CTRL_STATUS);
+ return 0;
}
static void dwmac1000_set_eee_pls(struct mac_device_info *hw, int link)
@@ -509,8 +503,7 @@ const struct stmmac_ops dwmac1000_ops = {
.pmt = dwmac1000_pmt,
.set_umac_addr = dwmac1000_set_umac_addr,
.get_umac_addr = dwmac1000_get_umac_addr,
- .set_eee_mode = dwmac1000_set_eee_mode,
- .reset_eee_mode = dwmac1000_reset_eee_mode,
+ .set_lpi_mode = dwmac1000_set_lpi_mode,
.set_eee_timer = dwmac1000_set_eee_timer,
.set_eee_pls = dwmac1000_set_eee_pls,
.debug = dwmac1000_debug,
@@ -633,7 +626,7 @@ int dwmac1000_ptp_enable(struct ptp_clock_info *ptp,
}
netdev_dbg(priv->dev, "Auxiliary Snapshot %s.\n",
- on ? "enabled" : "disabled");
+ str_enabled_disabled(on));
writel(tcr_val, ptpaddr + PTP_TCR);
/* wait for auxts fifo clear to finish */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 184d41a306af..42fe29a4e300 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -177,23 +177,13 @@ enum power_event {
/* Energy Efficient Ethernet (EEE) for GMAC4
*
* LPI status, timer and control register offset
+ * For LPI control and status bit definitions, see common.h.
*/
#define GMAC4_LPI_CTRL_STATUS 0xd0
#define GMAC4_LPI_TIMER_CTRL 0xd4
#define GMAC4_LPI_ENTRY_TIMER 0xd8
#define GMAC4_MAC_ONEUS_TIC_COUNTER 0xdc
-/* LPI control and status defines */
-#define GMAC4_LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable */
-#define GMAC4_LPI_CTRL_STATUS_LPIATE BIT(20) /* LPI Timer Enable */
-#define GMAC4_LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */
-#define GMAC4_LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */
-#define GMAC4_LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */
-#define GMAC4_LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */
-#define GMAC4_LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */
-#define GMAC4_LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */
-#define GMAC4_LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */
-
/* MAC Debug bitmap */
#define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17)
#define GMAC_DEBUG_TFCSTS_SHIFT 17
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 9ed8620580a8..cc4ddf608652 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -376,33 +376,46 @@ static void dwmac4_get_umac_addr(struct mac_device_info *hw,
GMAC_ADDR_LOW(reg_n));
}
-static void dwmac4_set_eee_mode(struct mac_device_info *hw,
- bool en_tx_lpi_clockgating)
+static int dwmac4_set_lpi_mode(struct mac_device_info *hw,
+ enum stmmac_lpi_mode mode,
+ bool en_tx_lpi_clockgating, u32 et)
{
void __iomem *ioaddr = hw->pcsr;
- u32 value;
+ u32 value, mask;
- /* Enable the link status receive on RGMII, SGMII ore SMII
- * receive path and instruct the transmit to enter in LPI
- * state.
- */
- value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
- value |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA;
+ if (mode == STMMAC_LPI_DISABLE) {
+ value = 0;
+ } else {
+ value = LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA;
- if (en_tx_lpi_clockgating)
- value |= GMAC4_LPI_CTRL_STATUS_LPITCSE;
+ if (mode == STMMAC_LPI_TIMER) {
+ /* Return ERANGE if the timer is larger than the
+ * register field.
+ */
+ if (et > STMMAC_ET_MAX)
+ return -ERANGE;
- writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS);
-}
+ /* Set the hardware LPI entry timer */
+ writel(et, ioaddr + GMAC4_LPI_ENTRY_TIMER);
-static void dwmac4_reset_eee_mode(struct mac_device_info *hw)
-{
- void __iomem *ioaddr = hw->pcsr;
- u32 value;
+ /* Interpret a zero LPI entry timer to mean
+ * immediate entry into LPI mode.
+ */
+ if (et)
+ value |= LPI_CTRL_STATUS_LPIATE;
+ }
- value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
- value &= ~(GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA);
+ if (en_tx_lpi_clockgating)
+ value |= LPI_CTRL_STATUS_LPITCSE;
+ }
+
+ mask = LPI_CTRL_STATUS_LPIATE | LPI_CTRL_STATUS_LPIEN |
+ LPI_CTRL_STATUS_LPITXA | LPI_CTRL_STATUS_LPITCSE;
+
+ value |= readl(ioaddr + GMAC4_LPI_CTRL_STATUS) & ~mask;
writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS);
+
+ return 0;
}
static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link)
@@ -413,34 +426,13 @@ static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link)
value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
if (link)
- value |= GMAC4_LPI_CTRL_STATUS_PLS;
+ value |= LPI_CTRL_STATUS_PLS;
else
- value &= ~GMAC4_LPI_CTRL_STATUS_PLS;
+ value &= ~LPI_CTRL_STATUS_PLS;
writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS);
}
-static void dwmac4_set_eee_lpi_entry_timer(struct mac_device_info *hw, u32 et)
-{
- void __iomem *ioaddr = hw->pcsr;
- u32 value = et & STMMAC_ET_MAX;
- int regval;
-
- /* Program LPI entry timer value into register */
- writel(value, ioaddr + GMAC4_LPI_ENTRY_TIMER);
-
- /* Enable/disable LPI entry timer */
- regval = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
- regval |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA;
-
- if (et)
- regval |= GMAC4_LPI_CTRL_STATUS_LPIATE;
- else
- regval &= ~GMAC4_LPI_CTRL_STATUS_LPIATE;
-
- writel(regval, ioaddr + GMAC4_LPI_CTRL_STATUS);
-}
-
static void dwmac4_set_eee_timer(struct mac_device_info *hw, int ls, int tw)
{
void __iomem *ioaddr = hw->pcsr;
@@ -849,17 +841,17 @@ static int dwmac4_irq_status(struct mac_device_info *hw,
/* Clear LPI interrupt by reading MAC_LPI_Control_Status */
u32 status = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
- if (status & GMAC4_LPI_CTRL_STATUS_TLPIEN) {
+ if (status & LPI_CTRL_STATUS_TLPIEN) {
ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE;
x->irq_tx_path_in_lpi_mode_n++;
}
- if (status & GMAC4_LPI_CTRL_STATUS_TLPIEX) {
+ if (status & LPI_CTRL_STATUS_TLPIEX) {
ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE;
x->irq_tx_path_exit_lpi_mode_n++;
}
- if (status & GMAC4_LPI_CTRL_STATUS_RLPIEN)
+ if (status & LPI_CTRL_STATUS_RLPIEN)
x->irq_rx_path_in_lpi_mode_n++;
- if (status & GMAC4_LPI_CTRL_STATUS_RLPIEX)
+ if (status & LPI_CTRL_STATUS_RLPIEX)
x->irq_rx_path_exit_lpi_mode_n++;
}
@@ -1201,9 +1193,7 @@ const struct stmmac_ops dwmac4_ops = {
.pmt = dwmac4_pmt,
.set_umac_addr = dwmac4_set_umac_addr,
.get_umac_addr = dwmac4_get_umac_addr,
- .set_eee_mode = dwmac4_set_eee_mode,
- .reset_eee_mode = dwmac4_reset_eee_mode,
- .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer,
+ .set_lpi_mode = dwmac4_set_lpi_mode,
.set_eee_timer = dwmac4_set_eee_timer,
.set_eee_pls = dwmac4_set_eee_pls,
.pcs_ctrl_ane = dwmac4_ctrl_ane,
@@ -1245,9 +1235,7 @@ const struct stmmac_ops dwmac410_ops = {
.pmt = dwmac4_pmt,
.set_umac_addr = dwmac4_set_umac_addr,
.get_umac_addr = dwmac4_get_umac_addr,
- .set_eee_mode = dwmac4_set_eee_mode,
- .reset_eee_mode = dwmac4_reset_eee_mode,
- .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer,
+ .set_lpi_mode = dwmac4_set_lpi_mode,
.set_eee_timer = dwmac4_set_eee_timer,
.set_eee_pls = dwmac4_set_eee_pls,
.pcs_ctrl_ane = dwmac4_ctrl_ane,
@@ -1291,9 +1279,7 @@ const struct stmmac_ops dwmac510_ops = {
.pmt = dwmac4_pmt,
.set_umac_addr = dwmac4_set_umac_addr,
.get_umac_addr = dwmac4_get_umac_addr,
- .set_eee_mode = dwmac4_set_eee_mode,
- .reset_eee_mode = dwmac4_reset_eee_mode,
- .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer,
+ .set_lpi_mode = dwmac4_set_lpi_mode,
.set_eee_timer = dwmac4_set_eee_timer,
.set_eee_pls = dwmac4_set_eee_pls,
.pcs_ctrl_ane = dwmac4_ctrl_ane,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 20027d3c25a7..a03f5d771566 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -112,14 +112,7 @@
#define XGMAC_MGKPKTEN BIT(1)
#define XGMAC_PWRDWN BIT(0)
#define XGMAC_LPI_CTRL 0x000000d0
-#define XGMAC_TXCGE BIT(21)
-#define XGMAC_LPITXA BIT(19)
-#define XGMAC_PLS BIT(17)
-#define XGMAC_LPITXEN BIT(16)
-#define XGMAC_RLPIEX BIT(3)
-#define XGMAC_RLPIEN BIT(2)
-#define XGMAC_TLPIEX BIT(1)
-#define XGMAC_TLPIEN BIT(0)
+/* For definitions, see LPI_CTRL_STATUS_xxx in common.h */
#define XGMAC_LPI_TIMER_CTRL 0x000000d4
#define XGMAC_HW_FEATURE0 0x0000011c
#define XGMAC_HWFEAT_EDMA BIT(31)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 9a60a6e8f633..a6d395c6bacd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -316,17 +316,17 @@ static int dwxgmac2_host_irq_status(struct mac_device_info *hw,
if (stat & XGMAC_LPIIS) {
u32 lpi = readl(ioaddr + XGMAC_LPI_CTRL);
- if (lpi & XGMAC_TLPIEN) {
+ if (lpi & LPI_CTRL_STATUS_TLPIEN) {
ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE;
x->irq_tx_path_in_lpi_mode_n++;
}
- if (lpi & XGMAC_TLPIEX) {
+ if (lpi & LPI_CTRL_STATUS_TLPIEX) {
ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE;
x->irq_tx_path_exit_lpi_mode_n++;
}
- if (lpi & XGMAC_RLPIEN)
+ if (lpi & LPI_CTRL_STATUS_RLPIEN)
x->irq_rx_path_in_lpi_mode_n++;
- if (lpi & XGMAC_RLPIEX)
+ if (lpi & LPI_CTRL_STATUS_RLPIEX)
x->irq_rx_path_exit_lpi_mode_n++;
}
@@ -425,29 +425,28 @@ static void dwxgmac2_get_umac_addr(struct mac_device_info *hw,
addr[5] = (hi_addr >> 8) & 0xff;
}
-static void dwxgmac2_set_eee_mode(struct mac_device_info *hw,
- bool en_tx_lpi_clockgating)
+static int dwxgmac2_set_lpi_mode(struct mac_device_info *hw,
+ enum stmmac_lpi_mode mode,
+ bool en_tx_lpi_clockgating, u32 et)
{
void __iomem *ioaddr = hw->pcsr;
u32 value;
- value = readl(ioaddr + XGMAC_LPI_CTRL);
-
- value |= XGMAC_LPITXEN | XGMAC_LPITXA;
- if (en_tx_lpi_clockgating)
- value |= XGMAC_TXCGE;
-
- writel(value, ioaddr + XGMAC_LPI_CTRL);
-}
-
-static void dwxgmac2_reset_eee_mode(struct mac_device_info *hw)
-{
- void __iomem *ioaddr = hw->pcsr;
- u32 value;
+ if (mode == STMMAC_LPI_TIMER)
+ return -EOPNOTSUPP;
value = readl(ioaddr + XGMAC_LPI_CTRL);
- value &= ~(XGMAC_LPITXEN | XGMAC_LPITXA | XGMAC_TXCGE);
+ if (mode == STMMAC_LPI_FORCED) {
+ value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA;
+ if (en_tx_lpi_clockgating)
+ value |= LPI_CTRL_STATUS_LPITCSE;
+ } else {
+ value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA |
+ LPI_CTRL_STATUS_LPITCSE);
+ }
writel(value, ioaddr + XGMAC_LPI_CTRL);
+
+ return 0;
}
static void dwxgmac2_set_eee_pls(struct mac_device_info *hw, int link)
@@ -457,9 +456,9 @@ static void dwxgmac2_set_eee_pls(struct mac_device_info *hw, int link)
value = readl(ioaddr + XGMAC_LPI_CTRL);
if (link)
- value |= XGMAC_PLS;
+ value |= LPI_CTRL_STATUS_PLS;
else
- value &= ~XGMAC_PLS;
+ value &= ~LPI_CTRL_STATUS_PLS;
writel(value, ioaddr + XGMAC_LPI_CTRL);
}
@@ -1525,8 +1524,7 @@ const struct stmmac_ops dwxgmac210_ops = {
.pmt = dwxgmac2_pmt,
.set_umac_addr = dwxgmac2_set_umac_addr,
.get_umac_addr = dwxgmac2_get_umac_addr,
- .set_eee_mode = dwxgmac2_set_eee_mode,
- .reset_eee_mode = dwxgmac2_reset_eee_mode,
+ .set_lpi_mode = dwxgmac2_set_lpi_mode,
.set_eee_timer = dwxgmac2_set_eee_timer,
.set_eee_pls = dwxgmac2_set_eee_pls,
.debug = NULL,
@@ -1582,8 +1580,7 @@ const struct stmmac_ops dwxlgmac2_ops = {
.pmt = dwxgmac2_pmt,
.set_umac_addr = dwxgmac2_set_umac_addr,
.get_umac_addr = dwxgmac2_get_umac_addr,
- .set_eee_mode = dwxgmac2_set_eee_mode,
- .reset_eee_mode = dwxgmac2_reset_eee_mode,
+ .set_lpi_mode = dwxgmac2_set_lpi_mode,
.set_eee_timer = dwxgmac2_set_eee_timer,
.set_eee_pls = dwxgmac2_set_eee_pls,
.debug = NULL,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 0f200b72c225..27c63a9fc163 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -306,6 +306,12 @@ struct stmmac_pps_cfg;
struct stmmac_rss;
struct stmmac_est;
+enum stmmac_lpi_mode {
+ STMMAC_LPI_DISABLE,
+ STMMAC_LPI_FORCED,
+ STMMAC_LPI_TIMER,
+};
+
/* Helpers to program the MAC core */
struct stmmac_ops {
/* MAC core initialization */
@@ -360,10 +366,9 @@ struct stmmac_ops {
unsigned int reg_n);
void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
unsigned int reg_n);
- void (*set_eee_mode)(struct mac_device_info *hw,
- bool en_tx_lpi_clockgating);
- void (*reset_eee_mode)(struct mac_device_info *hw);
- void (*set_eee_lpi_entry_timer)(struct mac_device_info *hw, u32 et);
+ int (*set_lpi_mode)(struct mac_device_info *hw,
+ enum stmmac_lpi_mode mode,
+ bool en_tx_lpi_clockgating, u32 et);
void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
void (*set_eee_pls)(struct mac_device_info *hw, int link);
void (*debug)(struct stmmac_priv *priv, void __iomem *ioaddr,
@@ -467,12 +472,8 @@ struct stmmac_ops {
stmmac_do_void_callback(__priv, mac, set_umac_addr, __args)
#define stmmac_get_umac_addr(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, get_umac_addr, __args)
-#define stmmac_set_eee_mode(__priv, __args...) \
- stmmac_do_void_callback(__priv, mac, set_eee_mode, __args)
-#define stmmac_reset_eee_mode(__priv, __args...) \
- stmmac_do_void_callback(__priv, mac, reset_eee_mode, __args)
-#define stmmac_set_eee_lpi_timer(__priv, __args...) \
- stmmac_do_void_callback(__priv, mac, set_eee_lpi_entry_timer, __args)
+#define stmmac_set_lpi_mode(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, set_lpi_mode, __args)
#define stmmac_set_eee_timer(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, set_eee_timer, __args)
#define stmmac_set_eee_pls(__priv, __args...) \
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index f05cae103d83..3395188c198a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -282,8 +282,7 @@ struct stmmac_priv {
struct stmmac_channel channel[STMMAC_CH_MAX];
int speed;
- unsigned int flow_ctrl;
- unsigned int pause;
+ unsigned int pause_time;
struct mii_bus *mii;
struct phylink_config phylink_config;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c0ae7db96f46..4d542f482ecb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -88,13 +88,13 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
#define STMMAC_XDP_TX BIT(1)
#define STMMAC_XDP_REDIRECT BIT(2)
-static int flow_ctrl = FLOW_AUTO;
+static int flow_ctrl = 0xdead;
module_param(flow_ctrl, int, 0644);
-MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]");
+MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off] (obsolete)");
static int pause = PAUSE_TIME;
module_param(pause, int, 0644);
-MODULE_PARM_DESC(pause, "Flow Control Pause Time");
+MODULE_PARM_DESC(pause, "Flow Control Pause Time (units of 512 bit times)");
#define TC_DEFAULT 64
static int tc = TC_DEFAULT;
@@ -188,12 +188,11 @@ static void stmmac_verify_args(void)
watchdog = TX_TIMEO;
if (unlikely((buf_sz < DEFAULT_BUFSIZE) || (buf_sz > BUF_SIZE_16KiB)))
buf_sz = DEFAULT_BUFSIZE;
- if (unlikely(flow_ctrl > 1))
- flow_ctrl = FLOW_AUTO;
- else if (likely(flow_ctrl < 0))
- flow_ctrl = FLOW_OFF;
if (unlikely((pause < 0) || (pause > 0xffff)))
pause = PAUSE_TIME;
+
+ if (flow_ctrl != 0xdead)
+ pr_warn("stmmac: module parameter 'flow_ctrl' is obsolete - please remove from your module configuration\n");
}
static void __stmmac_disable_all_queues(struct stmmac_priv *priv)
@@ -390,16 +389,6 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
return dirty;
}
-static void stmmac_disable_hw_lpi_timer(struct stmmac_priv *priv)
-{
- stmmac_set_eee_lpi_timer(priv, priv->hw, 0);
-}
-
-static void stmmac_enable_hw_lpi_timer(struct stmmac_priv *priv)
-{
- stmmac_set_eee_lpi_timer(priv, priv->hw, priv->tx_lpi_timer);
-}
-
static bool stmmac_eee_tx_busy(struct stmmac_priv *priv)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
@@ -436,8 +425,9 @@ static void stmmac_try_to_start_sw_lpi(struct stmmac_priv *priv)
/* Check and enter in LPI mode */
if (!priv->tx_path_in_lpi_mode)
- stmmac_set_eee_mode(priv, priv->hw,
- priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING);
+ stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_FORCED,
+ priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING,
+ 0);
}
/**
@@ -447,8 +437,8 @@ static void stmmac_try_to_start_sw_lpi(struct stmmac_priv *priv)
*/
static void stmmac_stop_sw_lpi(struct stmmac_priv *priv)
{
- stmmac_reset_eee_mode(priv, priv->hw);
del_timer_sync(&priv->eee_ctrl_timer);
+ stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0);
priv->tx_path_in_lpi_mode = false;
}
@@ -466,74 +456,6 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
stmmac_try_to_start_sw_lpi(priv);
}
-/**
- * stmmac_eee_init - init EEE
- * @priv: driver private structure
- * @active: indicates whether EEE should be enabled.
- * Description:
- * if the GMAC supports the EEE (from the HW cap reg) and the phy device
- * can also manage EEE, this function enable the LPI state and start related
- * timer.
- */
-static void stmmac_eee_init(struct stmmac_priv *priv, bool active)
-{
- priv->eee_active = active;
-
- /* Check if MAC core supports the EEE feature. */
- if (!priv->dma_cap.eee) {
- priv->eee_enabled = false;
- return;
- }
-
- mutex_lock(&priv->lock);
-
- /* Check if it needs to be deactivated */
- if (!priv->eee_active) {
- if (priv->eee_enabled) {
- netdev_dbg(priv->dev, "disable EEE\n");
- priv->eee_sw_timer_en = false;
- stmmac_disable_hw_lpi_timer(priv);
- del_timer_sync(&priv->eee_ctrl_timer);
- stmmac_set_eee_timer(priv, priv->hw, 0,
- STMMAC_DEFAULT_TWT_LS);
- if (priv->hw->xpcs)
- xpcs_config_eee(priv->hw->xpcs,
- priv->plat->mult_fact_100ns,
- false);
- }
- priv->eee_enabled = false;
- mutex_unlock(&priv->lock);
- return;
- }
-
- if (priv->eee_active && !priv->eee_enabled) {
- stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
- STMMAC_DEFAULT_TWT_LS);
- if (priv->hw->xpcs)
- xpcs_config_eee(priv->hw->xpcs,
- priv->plat->mult_fact_100ns,
- true);
- }
-
- if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) {
- /* Use hardware LPI mode */
- del_timer_sync(&priv->eee_ctrl_timer);
- priv->tx_path_in_lpi_mode = false;
- priv->eee_sw_timer_en = false;
- stmmac_enable_hw_lpi_timer(priv);
- } else {
- /* Use software LPI mode */
- priv->eee_sw_timer_en = true;
- stmmac_disable_hw_lpi_timer(priv);
- stmmac_restart_sw_lpi_timer(priv);
- }
-
- priv->eee_enabled = true;
-
- mutex_unlock(&priv->lock);
- netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
-}
-
/* stmmac_get_tx_hwtstamp - get HW TX timestamps
* @priv: driver private structure
* @p : descriptor pointer
@@ -935,14 +857,16 @@ static void stmmac_release_ptp(struct stmmac_priv *priv)
* stmmac_mac_flow_ctrl - Configure flow control in all queues
* @priv: driver private structure
* @duplex: duplex passed to the next function
+ * @flow_ctrl: desired flow control modes
* Description: It is used for configuring the flow control in all queues
*/
-static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
+static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex,
+ unsigned int flow_ctrl)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
- stmmac_flow_ctrl(priv, priv->hw, duplex, priv->flow_ctrl,
- priv->pause, tx_cnt);
+ stmmac_flow_ctrl(priv, priv->hw, duplex, flow_ctrl, priv->pause_time,
+ tx_cnt);
}
static unsigned long stmmac_mac_get_caps(struct phylink_config *config,
@@ -1002,6 +926,7 @@ static void stmmac_mac_link_up(struct phylink_config *config,
bool tx_pause, bool rx_pause)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+ unsigned int flow_ctrl;
u32 old_ctrl, ctrl;
if ((priv->plat->flags & STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP) &&
@@ -1082,15 +1007,15 @@ static void stmmac_mac_link_up(struct phylink_config *config,
/* Flow Control operation */
if (rx_pause && tx_pause)
- priv->flow_ctrl = FLOW_AUTO;
+ flow_ctrl = FLOW_AUTO;
else if (rx_pause && !tx_pause)
- priv->flow_ctrl = FLOW_RX;
+ flow_ctrl = FLOW_RX;
else if (!rx_pause && tx_pause)
- priv->flow_ctrl = FLOW_TX;
+ flow_ctrl = FLOW_TX;
else
- priv->flow_ctrl = FLOW_OFF;
+ flow_ctrl = FLOW_OFF;
- stmmac_mac_flow_ctrl(priv, duplex);
+ stmmac_mac_flow_ctrl(priv, duplex, flow_ctrl);
if (ctrl != old_ctrl)
writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
@@ -1110,16 +1035,53 @@ static void stmmac_mac_disable_tx_lpi(struct phylink_config *config)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
- stmmac_eee_init(priv, false);
+ priv->eee_active = false;
+
+ mutex_lock(&priv->lock);
+
+ priv->eee_enabled = false;
+
+ netdev_dbg(priv->dev, "disable EEE\n");
+ priv->eee_sw_timer_en = false;
+ del_timer_sync(&priv->eee_ctrl_timer);
+ stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0);
+ priv->tx_path_in_lpi_mode = false;
+
+ stmmac_set_eee_timer(priv, priv->hw, 0, STMMAC_DEFAULT_TWT_LS);
+ mutex_unlock(&priv->lock);
}
static int stmmac_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
bool tx_clk_stop)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+ int ret;
priv->tx_lpi_timer = timer;
- stmmac_eee_init(priv, true);
+ priv->eee_active = true;
+
+ mutex_lock(&priv->lock);
+
+ priv->eee_enabled = true;
+
+ stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
+ STMMAC_DEFAULT_TWT_LS);
+
+ /* Try to cnfigure the hardware timer. */
+ ret = stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_TIMER,
+ priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING,
+ priv->tx_lpi_timer);
+
+ if (ret) {
+ /* Hardware timer mode not supported, or value out of range.
+ * Fall back to using software LPI mode
+ */
+ priv->eee_sw_timer_en = true;
+ stmmac_restart_sw_lpi_timer(priv);
+ }
+
+ mutex_unlock(&priv->lock);
+ netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
return 0;
}
@@ -7444,7 +7406,7 @@ int stmmac_dvr_probe(struct device *device,
return -ENOMEM;
stmmac_set_ethtool_ops(ndev);
- priv->pause = pause;
+ priv->pause_time = pause;
priv->plat = plat_dat;
priv->ioaddr = res->addr;
priv->dev->base_addr = (unsigned long)res->addr;
@@ -7640,9 +7602,6 @@ int stmmac_dvr_probe(struct device *device,
"%s: warning: maxmtu having invalid value (%d)\n",
__func__, priv->plat->maxmtu);
- if (flow_ctrl)
- priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */
-
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
/* Setup channels NAPI */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 0c7d81ddd440..7c0a4046bbe3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -524,6 +524,8 @@ int stmmac_pcs_setup(struct net_device *ndev)
if (ret)
return dev_err_probe(priv->device, ret, "No xPCS found\n");
+ xpcs_config_eee_mult_fact(xpcs, priv->plat->mult_fact_100ns);
+
priv->hw->xpcs = xpcs;
return 0;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 2806238629f8..3e671be95d6f 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -164,6 +164,7 @@
#define AM65_CPSW_CPPI_TX_PKT_TYPE 0x7
/* XDP */
+#define AM65_CPSW_XDP_TX BIT(2)
#define AM65_CPSW_XDP_CONSUMED BIT(1)
#define AM65_CPSW_XDP_REDIRECT BIT(0)
#define AM65_CPSW_XDP_PASS 0
@@ -829,19 +830,19 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma)
{
struct am65_cpsw_tx_chn *tx_chn = data;
enum am65_cpsw_tx_buf_type buf_type;
+ struct am65_cpsw_tx_swdata *swdata;
struct cppi5_host_desc_t *desc_tx;
struct xdp_frame *xdpf;
struct sk_buff *skb;
- void **swdata;
desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
swdata = cppi5_hdesc_get_swdata(desc_tx);
buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma);
if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) {
- skb = *(swdata);
+ skb = swdata->skb;
dev_kfree_skb_any(skb);
} else {
- xdpf = *(swdata);
+ xdpf = swdata->xdpf;
xdp_return_frame(xdpf);
}
@@ -1098,10 +1099,10 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
struct cppi5_host_desc_t *host_desc;
+ struct am65_cpsw_tx_swdata *swdata;
struct netdev_queue *netif_txq;
dma_addr_t dma_desc, dma_buf;
u32 pkt_len = xdpf->len;
- void **swdata;
int ret;
host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
@@ -1131,7 +1132,8 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, pkt_len);
swdata = cppi5_hdesc_get_swdata(host_desc);
- *(swdata) = xdpf;
+ swdata->ndev = ndev;
+ swdata->xdpf = xdpf;
/* Report BQL before sending the packet */
netif_txq = netdev_get_tx_queue(ndev, tx_chn->id);
@@ -1167,17 +1169,16 @@ pool_free:
static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
struct am65_cpsw_port *port,
- struct xdp_buff *xdp,
- int cpu, int *len)
+ struct xdp_buff *xdp, int *len)
{
struct am65_cpsw_common *common = flow->common;
struct net_device *ndev = port->ndev;
int ret = AM65_CPSW_XDP_CONSUMED;
struct am65_cpsw_tx_chn *tx_chn;
struct netdev_queue *netif_txq;
+ int cpu = smp_processor_id();
struct xdp_frame *xdpf;
struct bpf_prog *prog;
- struct page *page;
int pkt_len;
u32 act;
int err;
@@ -1193,8 +1194,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
switch (act) {
case XDP_PASS:
- ret = AM65_CPSW_XDP_PASS;
- goto out;
+ return AM65_CPSW_XDP_PASS;
case XDP_TX:
tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_QUEUES];
netif_txq = netdev_get_tx_queue(ndev, tx_chn->id);
@@ -1213,15 +1213,13 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
goto drop;
dev_sw_netstats_rx_add(ndev, pkt_len);
- ret = AM65_CPSW_XDP_CONSUMED;
- goto out;
+ return AM65_CPSW_XDP_TX;
case XDP_REDIRECT:
if (unlikely(xdp_do_redirect(ndev, xdp, prog)))
goto drop;
dev_sw_netstats_rx_add(ndev, pkt_len);
- ret = AM65_CPSW_XDP_REDIRECT;
- goto out;
+ return AM65_CPSW_XDP_REDIRECT;
default:
bpf_warn_invalid_xdp_action(ndev, prog, act);
fallthrough;
@@ -1233,10 +1231,6 @@ drop:
ndev->stats.rx_dropped++;
}
- page = virt_to_head_page(xdp->data);
- am65_cpsw_put_page(flow, page, true);
-
-out:
return ret;
}
@@ -1274,7 +1268,7 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info)
}
static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
- int cpu, int *xdp_state)
+ int *xdp_state)
{
struct am65_cpsw_rx_chn *rx_chn = &flow->common->rx_chns;
u32 buf_dma_len, pkt_len, port_id = 0, csum_info;
@@ -1334,8 +1328,13 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]);
xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM,
pkt_len, false);
- *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp,
- cpu, &pkt_len);
+ *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, &pkt_len);
+ if (*xdp_state == AM65_CPSW_XDP_CONSUMED) {
+ page = virt_to_head_page(xdp.data);
+ am65_cpsw_put_page(flow, page, true);
+ goto allocate;
+ }
+
if (*xdp_state != AM65_CPSW_XDP_PASS)
goto allocate;
@@ -1401,7 +1400,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
{
struct am65_cpsw_rx_flow *flow = am65_cpsw_napi_to_rx_flow(napi_rx);
struct am65_cpsw_common *common = flow->common;
- int cpu = smp_processor_id();
int xdp_state_or = 0;
int cur_budget, ret;
int xdp_state;
@@ -1410,7 +1408,7 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
/* process only this flow */
cur_budget = budget;
while (cur_budget--) {
- ret = am65_cpsw_nuss_rx_packets(flow, cpu, &xdp_state);
+ ret = am65_cpsw_nuss_rx_packets(flow, &xdp_state);
xdp_state_or |= xdp_state;
if (ret)
break;
@@ -1438,52 +1436,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
return num_rx;
}
-static struct sk_buff *
-am65_cpsw_nuss_tx_compl_packet_skb(struct am65_cpsw_tx_chn *tx_chn,
- dma_addr_t desc_dma)
-{
- struct cppi5_host_desc_t *desc_tx;
- struct sk_buff *skb;
- void **swdata;
-
- desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool,
- desc_dma);
- swdata = cppi5_hdesc_get_swdata(desc_tx);
- skb = *(swdata);
- am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
-
- am65_cpts_tx_timestamp(tx_chn->common->cpts, skb);
-
- dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
-
- return skb;
-}
-
-static struct xdp_frame *
-am65_cpsw_nuss_tx_compl_packet_xdp(struct am65_cpsw_common *common,
- struct am65_cpsw_tx_chn *tx_chn,
- dma_addr_t desc_dma,
- struct net_device **ndev)
-{
- struct cppi5_host_desc_t *desc_tx;
- struct am65_cpsw_port *port;
- struct xdp_frame *xdpf;
- u32 port_id = 0;
- void **swdata;
-
- desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
- cppi5_desc_get_tags_ids(&desc_tx->hdr, NULL, &port_id);
- swdata = cppi5_hdesc_get_swdata(desc_tx);
- xdpf = *(swdata);
- am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
-
- port = am65_common_get_port(common, port_id);
- dev_sw_netstats_tx_add(port->ndev, 1, xdpf->len);
- *ndev = port->ndev;
-
- return xdpf;
-}
-
static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_device *ndev,
struct netdev_queue *netif_txq)
{
@@ -1504,13 +1456,17 @@ static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_d
static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
int chn, unsigned int budget, bool *tdown)
{
+ bool single_port = AM65_CPSW_IS_CPSW2G(common);
enum am65_cpsw_tx_buf_type buf_type;
+ struct am65_cpsw_tx_swdata *swdata;
+ struct cppi5_host_desc_t *desc_tx;
struct device *dev = common->dev;
struct am65_cpsw_tx_chn *tx_chn;
struct netdev_queue *netif_txq;
unsigned int total_bytes = 0;
struct net_device *ndev;
struct xdp_frame *xdpf;
+ unsigned int pkt_len;
struct sk_buff *skb;
dma_addr_t desc_dma;
int res, num_tx = 0;
@@ -1518,9 +1474,12 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
tx_chn = &common->tx_chns[chn];
while (true) {
- spin_lock(&tx_chn->lock);
+ if (!single_port)
+ spin_lock(&tx_chn->lock);
res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma);
- spin_unlock(&tx_chn->lock);
+ if (!single_port)
+ spin_unlock(&tx_chn->lock);
+
if (res == -ENODATA)
break;
@@ -1531,27 +1490,43 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
break;
}
+ desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool,
+ desc_dma);
+ swdata = cppi5_hdesc_get_swdata(desc_tx);
+ ndev = swdata->ndev;
buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma);
if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) {
- skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma);
- ndev = skb->dev;
- total_bytes = skb->len;
+ skb = swdata->skb;
+ am65_cpts_tx_timestamp(tx_chn->common->cpts, skb);
+ pkt_len = skb->len;
napi_consume_skb(skb, budget);
} else {
- xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn,
- desc_dma, &ndev);
- total_bytes = xdpf->len;
+ xdpf = swdata->xdpf;
+ pkt_len = xdpf->len;
if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX)
xdp_return_frame_rx_napi(xdpf);
else
xdp_return_frame(xdpf);
}
+
+ total_bytes += pkt_len;
num_tx++;
+ am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
+ dev_sw_netstats_tx_add(ndev, 1, pkt_len);
+ if (!single_port) {
+ /* as packets from multi ports can be interleaved
+ * on the same channel, we have to figure out the
+ * port/queue at every packet and report it/wake queue.
+ */
+ netif_txq = netdev_get_tx_queue(ndev, chn);
+ netdev_tx_completed_queue(netif_txq, 1, pkt_len);
+ am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq);
+ }
+ }
+ if (single_port) {
netif_txq = netdev_get_tx_queue(ndev, chn);
-
netdev_tx_completed_queue(netif_txq, num_tx, total_bytes);
-
am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq);
}
@@ -1560,66 +1535,6 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
return num_tx;
}
-static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common,
- int chn, unsigned int budget, bool *tdown)
-{
- enum am65_cpsw_tx_buf_type buf_type;
- struct device *dev = common->dev;
- struct am65_cpsw_tx_chn *tx_chn;
- struct netdev_queue *netif_txq;
- unsigned int total_bytes = 0;
- struct net_device *ndev;
- struct xdp_frame *xdpf;
- struct sk_buff *skb;
- dma_addr_t desc_dma;
- int res, num_tx = 0;
-
- tx_chn = &common->tx_chns[chn];
-
- while (true) {
- res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma);
- if (res == -ENODATA)
- break;
-
- if (cppi5_desc_is_tdcm(desc_dma)) {
- if (atomic_dec_and_test(&common->tdown_cnt))
- complete(&common->tdown_complete);
- *tdown = true;
- break;
- }
-
- buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma);
- if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) {
- skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma);
- ndev = skb->dev;
- total_bytes += skb->len;
- napi_consume_skb(skb, budget);
- } else {
- xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn,
- desc_dma, &ndev);
- total_bytes += xdpf->len;
- if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX)
- xdp_return_frame_rx_napi(xdpf);
- else
- xdp_return_frame(xdpf);
- }
- num_tx++;
- }
-
- if (!num_tx)
- return 0;
-
- netif_txq = netdev_get_tx_queue(ndev, chn);
-
- netdev_tx_completed_queue(netif_txq, num_tx, total_bytes);
-
- am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq);
-
- dev_dbg(dev, "%s:%u pkt:%d\n", __func__, chn, num_tx);
-
- return num_tx;
-}
-
static enum hrtimer_restart am65_cpsw_nuss_tx_timer_callback(struct hrtimer *timer)
{
struct am65_cpsw_tx_chn *tx_chns =
@@ -1635,13 +1550,8 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)
bool tdown = false;
int num_tx;
- if (AM65_CPSW_IS_CPSW2G(tx_chn->common))
- num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id,
- budget, &tdown);
- else
- num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common,
- tx_chn->id, budget, &tdown);
-
+ num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common,
+ tx_chn->id, budget, &tdown);
if (num_tx >= budget)
return budget;
@@ -1685,12 +1595,12 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb,
struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
struct cppi5_host_desc_t *first_desc, *next_desc, *cur_desc;
struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+ struct am65_cpsw_tx_swdata *swdata;
struct device *dev = common->dev;
struct am65_cpsw_tx_chn *tx_chn;
struct netdev_queue *netif_txq;
dma_addr_t desc_dma, buf_dma;
int ret, q_idx, i;
- void **swdata;
u32 *psdata;
u32 pkt_len;
@@ -1736,7 +1646,8 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb,
k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
swdata = cppi5_hdesc_get_swdata(first_desc);
- *(swdata) = skb;
+ swdata->ndev = ndev;
+ swdata->skb = skb;
psdata = cppi5_hdesc_get_psdata(first_desc);
/* HW csum offload if enabled */
@@ -3578,6 +3489,10 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
__be64 id_temp;
int ret, i;
+ BUILD_BUG_ON_MSG(sizeof(struct am65_cpsw_tx_swdata) > AM65_CPSW_NAV_SW_DATA_SIZE,
+ "TX SW_DATA size exceeds AM65_CPSW_NAV_SW_DATA_SIZE");
+ BUILD_BUG_ON_MSG(sizeof(struct am65_cpsw_swdata) > AM65_CPSW_NAV_SW_DATA_SIZE,
+ "SW_DATA size exceeds AM65_CPSW_NAV_SW_DATA_SIZE");
common = devm_kzalloc(dev, sizeof(struct am65_cpsw_common), GFP_KERNEL);
if (!common)
return -ENOMEM;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index e7832a5cf3cc..917c37e4e89b 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -104,6 +104,14 @@ struct am65_cpsw_rx_flow {
char name[32];
};
+struct am65_cpsw_tx_swdata {
+ struct net_device *ndev;
+ union {
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
+};
+
struct am65_cpsw_swdata {
u32 flow_id;
struct page *page;
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index 35d96c633a33..7502214cc7d5 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -28,6 +28,7 @@ config XILINX_AXI_EMAC
depends on HAS_IOMEM
depends on XILINX_DMA
select PHYLINK
+ select DIMLIB
help
This driver supports the 10/100/1000 Ethernet from Xilinx for the
AXI bus interface used in Xilinx Virtex FPGAs and Soc's.
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index a3f4f3e42587..5ff742103beb 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -9,6 +9,7 @@
#ifndef XILINX_AXIENET_H
#define XILINX_AXIENET_H
+#include <linux/dim.h>
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
@@ -112,9 +113,6 @@
#define XAXIDMA_DELAY_MASK 0xFF000000 /* Delay timeout counter */
#define XAXIDMA_COALESCE_MASK 0x00FF0000 /* Coalesce counter */
-#define XAXIDMA_DELAY_SHIFT 24
-#define XAXIDMA_COALESCE_SHIFT 16
-
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 /* Completion intr */
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 /* Delay interrupt */
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 /* Error interrupt */
@@ -126,8 +124,7 @@
/* Default TX/RX Threshold and delay timer values for SGDMA mode */
#define XAXIDMA_DFT_TX_THRESHOLD 24
#define XAXIDMA_DFT_TX_USEC 50
-#define XAXIDMA_DFT_RX_THRESHOLD 1
-#define XAXIDMA_DFT_RX_USEC 50
+#define XAXIDMA_DFT_RX_USEC 16
#define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */
#define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */
@@ -487,7 +484,12 @@ struct skbuf_dma_descriptor {
* @regs: Base address for the axienet_local device address space
* @dma_regs: Base address for the axidma device address space
* @napi_rx: NAPI RX control structure
+ * @rx_dim: DIM state for the receive queue
+ * @rx_dim_enabled: Whether DIM is enabled or not
+ * @rx_irqs: Number of interrupts
+ * @rx_cr_lock: Lock protecting @rx_dma_cr, its register, and @rx_dma_started
* @rx_dma_cr: Nominal content of RX DMA control register
+ * @rx_dma_started: Set when RX DMA is started
* @rx_bd_v: Virtual address of the RX buffer descriptor ring
* @rx_bd_p: Physical address(start address) of the RX buffer descr. ring
* @rx_bd_num: Size of RX buffer descriptor ring
@@ -497,7 +499,9 @@ struct skbuf_dma_descriptor {
* @rx_bytes: RX byte count for statistics
* @rx_stat_sync: Synchronization object for RX stats
* @napi_tx: NAPI TX control structure
+ * @tx_cr_lock: Lock protecting @tx_dma_cr, its register, and @tx_dma_started
* @tx_dma_cr: Nominal content of TX DMA control register
+ * @tx_dma_started: Set when TX DMA is started
* @tx_bd_v: Virtual address of the TX buffer descriptor ring
* @tx_bd_p: Physical address(start address) of the TX buffer descr. ring
* @tx_bd_num: Size of TX buffer descriptor ring
@@ -532,10 +536,6 @@ struct skbuf_dma_descriptor {
* supported, the maximum frame size would be 9k. Else it is
* 1522 bytes (assuming support for basic VLAN)
* @rxmem: Stores rx memory size for jumbo frame handling.
- * @coalesce_count_rx: Store the irq coalesce on RX side.
- * @coalesce_usec_rx: IRQ coalesce delay for RX
- * @coalesce_count_tx: Store the irq coalesce on TX side.
- * @coalesce_usec_tx: IRQ coalesce delay for TX
* @use_dmaengine: flag to check dmaengine framework usage.
* @tx_chan: TX DMA channel.
* @rx_chan: RX DMA channel.
@@ -569,7 +569,12 @@ struct axienet_local {
void __iomem *dma_regs;
struct napi_struct napi_rx;
+ struct dim rx_dim;
+ bool rx_dim_enabled;
+ u16 rx_irqs;
+ spinlock_t rx_cr_lock;
u32 rx_dma_cr;
+ bool rx_dma_started;
struct axidma_bd *rx_bd_v;
dma_addr_t rx_bd_p;
u32 rx_bd_num;
@@ -579,7 +584,9 @@ struct axienet_local {
struct u64_stats_sync rx_stat_sync;
struct napi_struct napi_tx;
+ spinlock_t tx_cr_lock;
u32 tx_dma_cr;
+ bool tx_dma_started;
struct axidma_bd *tx_bd_v;
dma_addr_t tx_bd_p;
u32 tx_bd_num;
@@ -610,10 +617,6 @@ struct axienet_local {
u32 max_frm_size;
u32 rxmem;
- u32 coalesce_count_rx;
- u32 coalesce_usec_rx;
- u32 coalesce_count_tx;
- u32 coalesce_usec_tx;
u8 use_dmaengine;
struct dma_chan *tx_chan;
struct dma_chan *rx_chan;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index f33178f90c42..054abf283ab3 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -223,23 +223,62 @@ static void axienet_dma_bd_release(struct net_device *ndev)
lp->rx_bd_p);
}
+static u64 axienet_dma_rate(struct axienet_local *lp)
+{
+ if (lp->axi_clk)
+ return clk_get_rate(lp->axi_clk);
+ return 125000000; /* arbitrary guess if no clock rate set */
+}
+
/**
- * axienet_usec_to_timer - Calculate IRQ delay timer value
- * @lp: Pointer to the axienet_local structure
- * @coalesce_usec: Microseconds to convert into timer value
+ * axienet_calc_cr() - Calculate control register value
+ * @lp: Device private data
+ * @count: Number of completions before an interrupt
+ * @usec: Microseconds after the last completion before an interrupt
+ *
+ * Calculate a control register value based on the coalescing settings. The
+ * run/stop bit is not set.
*/
-static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec)
+static u32 axienet_calc_cr(struct axienet_local *lp, u32 count, u32 usec)
{
- u32 result;
- u64 clk_rate = 125000000; /* arbitrary guess if no clock rate set */
+ u32 cr;
- if (lp->axi_clk)
- clk_rate = clk_get_rate(lp->axi_clk);
+ cr = FIELD_PREP(XAXIDMA_COALESCE_MASK, count) | XAXIDMA_IRQ_IOC_MASK |
+ XAXIDMA_IRQ_ERROR_MASK;
+ /* Only set interrupt delay timer if not generating an interrupt on
+ * the first packet. Otherwise leave at 0 to disable delay interrupt.
+ */
+ if (count > 1) {
+ u64 clk_rate = axienet_dma_rate(lp);
+ u32 timer;
+
+ /* 1 Timeout Interval = 125 * (clock period of SG clock) */
+ timer = DIV64_U64_ROUND_CLOSEST((u64)usec * clk_rate,
+ XAXIDMA_DELAY_SCALE);
- /* 1 Timeout Interval = 125 * (clock period of SG clock) */
- result = DIV64_U64_ROUND_CLOSEST((u64)coalesce_usec * clk_rate,
- XAXIDMA_DELAY_SCALE);
- return min(result, FIELD_MAX(XAXIDMA_DELAY_MASK));
+ timer = min(timer, FIELD_MAX(XAXIDMA_DELAY_MASK));
+ cr |= FIELD_PREP(XAXIDMA_DELAY_MASK, timer) |
+ XAXIDMA_IRQ_DELAY_MASK;
+ }
+
+ return cr;
+}
+
+/**
+ * axienet_coalesce_params() - Extract coalesce parameters from the CR
+ * @lp: Device private data
+ * @cr: The control register to parse
+ * @count: Number of packets before an interrupt
+ * @usec: Idle time (in usec) before an interrupt
+ */
+static void axienet_coalesce_params(struct axienet_local *lp, u32 cr,
+ u32 *count, u32 *usec)
+{
+ u64 clk_rate = axienet_dma_rate(lp);
+ u64 timer = FIELD_GET(XAXIDMA_DELAY_MASK, cr);
+
+ *count = FIELD_GET(XAXIDMA_COALESCE_MASK, cr);
+ *usec = DIV64_U64_ROUND_CLOSEST(timer * XAXIDMA_DELAY_SCALE, clk_rate);
}
/**
@@ -248,30 +287,12 @@ static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec)
*/
static void axienet_dma_start(struct axienet_local *lp)
{
+ spin_lock_irq(&lp->rx_cr_lock);
+
/* Start updating the Rx channel control register */
- lp->rx_dma_cr = (lp->coalesce_count_rx << XAXIDMA_COALESCE_SHIFT) |
- XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK;
- /* Only set interrupt delay timer if not generating an interrupt on
- * the first RX packet. Otherwise leave at 0 to disable delay interrupt.
- */
- if (lp->coalesce_count_rx > 1)
- lp->rx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_rx)
- << XAXIDMA_DELAY_SHIFT) |
- XAXIDMA_IRQ_DELAY_MASK;
+ lp->rx_dma_cr &= ~XAXIDMA_CR_RUNSTOP_MASK;
axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
- /* Start updating the Tx channel control register */
- lp->tx_dma_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) |
- XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK;
- /* Only set interrupt delay timer if not generating an interrupt on
- * the first TX packet. Otherwise leave at 0 to disable delay interrupt.
- */
- if (lp->coalesce_count_tx > 1)
- lp->tx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx)
- << XAXIDMA_DELAY_SHIFT) |
- XAXIDMA_IRQ_DELAY_MASK;
- axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
-
/* Populate the tail pointer and bring the Rx Axi DMA engine out of
* halted state. This will make the Rx side ready for reception.
*/
@@ -280,6 +301,14 @@ static void axienet_dma_start(struct axienet_local *lp)
axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
(sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
+ lp->rx_dma_started = true;
+
+ spin_unlock_irq(&lp->rx_cr_lock);
+ spin_lock_irq(&lp->tx_cr_lock);
+
+ /* Start updating the Tx channel control register */
+ lp->tx_dma_cr &= ~XAXIDMA_CR_RUNSTOP_MASK;
+ axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
/* Write to the RS (Run-stop) bit in the Tx channel control register.
* Tx channel is now ready to run. But only after we write to the
@@ -288,6 +317,9 @@ static void axienet_dma_start(struct axienet_local *lp)
axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p);
lp->tx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK;
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
+ lp->tx_dma_started = true;
+
+ spin_unlock_irq(&lp->tx_cr_lock);
}
/**
@@ -623,14 +655,22 @@ static void axienet_dma_stop(struct axienet_local *lp)
int count;
u32 cr, sr;
- cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
- cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+ spin_lock_irq(&lp->rx_cr_lock);
+
+ cr = lp->rx_dma_cr & ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+ lp->rx_dma_started = false;
+
+ spin_unlock_irq(&lp->rx_cr_lock);
synchronize_irq(lp->rx_irq);
- cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
- cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+ spin_lock_irq(&lp->tx_cr_lock);
+
+ cr = lp->tx_dma_cr & ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+ lp->tx_dma_started = false;
+
+ spin_unlock_irq(&lp->tx_cr_lock);
synchronize_irq(lp->tx_irq);
/* Give DMAs a chance to halt gracefully */
@@ -962,6 +1002,7 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget)
&size, budget);
if (packets) {
+ netdev_completed_queue(ndev, packets, size);
u64_stats_update_begin(&lp->tx_stat_sync);
u64_stats_add(&lp->tx_packets, packets);
u64_stats_add(&lp->tx_bytes, size);
@@ -979,7 +1020,9 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget)
* cause an immediate interrupt if any TX packets are
* already pending.
*/
+ spin_lock_irq(&lp->tx_cr_lock);
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
+ spin_unlock_irq(&lp->tx_cr_lock);
}
return packets;
}
@@ -1083,6 +1126,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
if (++new_tail_ptr >= lp->tx_bd_num)
new_tail_ptr = 0;
WRITE_ONCE(lp->tx_bd_tail, new_tail_ptr);
+ netdev_sent_queue(ndev, skb->len);
/* Start the transfer */
axienet_dma_out_addr(lp, XAXIDMA_TX_TDESC_OFFSET, tail_p);
@@ -1241,11 +1285,25 @@ static int axienet_rx_poll(struct napi_struct *napi, int budget)
axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p);
if (packets < budget && napi_complete_done(napi, packets)) {
+ if (READ_ONCE(lp->rx_dim_enabled)) {
+ struct dim_sample sample = {
+ .time = ktime_get(),
+ /* Safe because we are the only writer */
+ .pkt_ctr = u64_stats_read(&lp->rx_packets),
+ .byte_ctr = u64_stats_read(&lp->rx_bytes),
+ .event_ctr = READ_ONCE(lp->rx_irqs),
+ };
+
+ net_dim(&lp->rx_dim, &sample);
+ }
+
/* Re-enable RX completion interrupts. This should
* cause an immediate interrupt if any RX packets are
* already pending.
*/
+ spin_lock_irq(&lp->rx_cr_lock);
axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
+ spin_unlock_irq(&lp->rx_cr_lock);
}
return packets;
}
@@ -1283,11 +1341,14 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev)
/* Disable further TX completion interrupts and schedule
* NAPI to handle the completions.
*/
- u32 cr = lp->tx_dma_cr;
-
- cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
if (napi_schedule_prep(&lp->napi_tx)) {
+ u32 cr;
+
+ spin_lock(&lp->tx_cr_lock);
+ cr = lp->tx_dma_cr;
+ cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+ spin_unlock(&lp->tx_cr_lock);
__napi_schedule(&lp->napi_tx);
}
}
@@ -1328,11 +1389,16 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev)
/* Disable further RX completion interrupts and schedule
* NAPI receive.
*/
- u32 cr = lp->rx_dma_cr;
-
- cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
+ WRITE_ONCE(lp->rx_irqs, READ_ONCE(lp->rx_irqs) + 1);
if (napi_schedule_prep(&lp->napi_rx)) {
+ u32 cr;
+
+ spin_lock(&lp->rx_cr_lock);
+ cr = lp->rx_dma_cr;
+ cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+ spin_unlock(&lp->rx_cr_lock);
+
__napi_schedule(&lp->napi_rx);
}
}
@@ -1625,6 +1691,7 @@ err_free_eth_irq:
if (lp->eth_irq > 0)
free_irq(lp->eth_irq, ndev);
err_phy:
+ cancel_work_sync(&lp->rx_dim.work);
cancel_delayed_work_sync(&lp->stats_work);
phylink_stop(lp->phylink);
phylink_disconnect_phy(lp->phylink);
@@ -1654,6 +1721,7 @@ static int axienet_stop(struct net_device *ndev)
napi_disable(&lp->napi_rx);
}
+ cancel_work_sync(&lp->rx_dim.work);
cancel_delayed_work_sync(&lp->stats_work);
phylink_stop(lp->phylink);
@@ -1685,6 +1753,7 @@ static int axienet_stop(struct net_device *ndev)
dma_release_channel(lp->tx_chan);
}
+ netdev_reset_queue(ndev);
axienet_iow(lp, XAE_IE_OFFSET, 0);
if (lp->eth_irq > 0)
@@ -1999,6 +2068,87 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev,
}
/**
+ * axienet_update_coalesce_rx() - Set RX CR
+ * @lp: Device private data
+ * @cr: Value to write to the RX CR
+ * @mask: Bits to set from @cr
+ */
+static void axienet_update_coalesce_rx(struct axienet_local *lp, u32 cr,
+ u32 mask)
+{
+ spin_lock_irq(&lp->rx_cr_lock);
+ lp->rx_dma_cr &= ~mask;
+ lp->rx_dma_cr |= cr;
+ /* If DMA isn't started, then the settings will be applied the next
+ * time dma_start() is called.
+ */
+ if (lp->rx_dma_started) {
+ u32 reg = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+
+ /* Don't enable IRQs if they are disabled by NAPI */
+ if (reg & XAXIDMA_IRQ_ALL_MASK)
+ cr = lp->rx_dma_cr;
+ else
+ cr = lp->rx_dma_cr & ~XAXIDMA_IRQ_ALL_MASK;
+ axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+ }
+ spin_unlock_irq(&lp->rx_cr_lock);
+}
+
+/**
+ * axienet_dim_coalesce_count_rx() - RX coalesce count for DIM
+ * @lp: Device private data
+ */
+static u32 axienet_dim_coalesce_count_rx(struct axienet_local *lp)
+{
+ return min(1 << (lp->rx_dim.profile_ix << 1), 255);
+}
+
+/**
+ * axienet_rx_dim_work() - Adjust RX DIM settings
+ * @work: The work struct
+ */
+static void axienet_rx_dim_work(struct work_struct *work)
+{
+ struct axienet_local *lp =
+ container_of(work, struct axienet_local, rx_dim.work);
+ u32 cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp), 0);
+ u32 mask = XAXIDMA_COALESCE_MASK | XAXIDMA_IRQ_IOC_MASK |
+ XAXIDMA_IRQ_ERROR_MASK;
+
+ axienet_update_coalesce_rx(lp, cr, mask);
+ lp->rx_dim.state = DIM_START_MEASURE;
+}
+
+/**
+ * axienet_update_coalesce_tx() - Set TX CR
+ * @lp: Device private data
+ * @cr: Value to write to the TX CR
+ * @mask: Bits to set from @cr
+ */
+static void axienet_update_coalesce_tx(struct axienet_local *lp, u32 cr,
+ u32 mask)
+{
+ spin_lock_irq(&lp->tx_cr_lock);
+ lp->tx_dma_cr &= ~mask;
+ lp->tx_dma_cr |= cr;
+ /* If DMA isn't started, then the settings will be applied the next
+ * time dma_start() is called.
+ */
+ if (lp->tx_dma_started) {
+ u32 reg = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+
+ /* Don't enable IRQs if they are disabled by NAPI */
+ if (reg & XAXIDMA_IRQ_ALL_MASK)
+ cr = lp->tx_dma_cr;
+ else
+ cr = lp->tx_dma_cr & ~XAXIDMA_IRQ_ALL_MASK;
+ axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+ }
+ spin_unlock_irq(&lp->tx_cr_lock);
+}
+
+/**
* axienet_ethtools_get_coalesce - Get DMA interrupt coalescing count.
* @ndev: Pointer to net_device structure
* @ecoalesce: Pointer to ethtool_coalesce structure
@@ -2018,11 +2168,23 @@ axienet_ethtools_get_coalesce(struct net_device *ndev,
struct netlink_ext_ack *extack)
{
struct axienet_local *lp = netdev_priv(ndev);
-
- ecoalesce->rx_max_coalesced_frames = lp->coalesce_count_rx;
- ecoalesce->rx_coalesce_usecs = lp->coalesce_usec_rx;
- ecoalesce->tx_max_coalesced_frames = lp->coalesce_count_tx;
- ecoalesce->tx_coalesce_usecs = lp->coalesce_usec_tx;
+ u32 cr;
+
+ ecoalesce->use_adaptive_rx_coalesce = lp->rx_dim_enabled;
+
+ spin_lock_irq(&lp->rx_cr_lock);
+ cr = lp->rx_dma_cr;
+ spin_unlock_irq(&lp->rx_cr_lock);
+ axienet_coalesce_params(lp, cr,
+ &ecoalesce->rx_max_coalesced_frames,
+ &ecoalesce->rx_coalesce_usecs);
+
+ spin_lock_irq(&lp->tx_cr_lock);
+ cr = lp->tx_dma_cr;
+ spin_unlock_irq(&lp->tx_cr_lock);
+ axienet_coalesce_params(lp, cr,
+ &ecoalesce->tx_max_coalesced_frames,
+ &ecoalesce->tx_coalesce_usecs);
return 0;
}
@@ -2046,12 +2208,9 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
struct netlink_ext_ack *extack)
{
struct axienet_local *lp = netdev_priv(ndev);
-
- if (netif_running(ndev)) {
- NL_SET_ERR_MSG(extack,
- "Please stop netif before applying configuration");
- return -EBUSY;
- }
+ bool new_dim = ecoalesce->use_adaptive_rx_coalesce;
+ bool old_dim = lp->rx_dim_enabled;
+ u32 cr, mask = ~XAXIDMA_CR_RUNSTOP_MASK;
if (ecoalesce->rx_max_coalesced_frames > 255 ||
ecoalesce->tx_max_coalesced_frames > 255) {
@@ -2065,7 +2224,7 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
return -EINVAL;
}
- if ((ecoalesce->rx_max_coalesced_frames > 1 &&
+ if (((ecoalesce->rx_max_coalesced_frames > 1 || new_dim) &&
!ecoalesce->rx_coalesce_usecs) ||
(ecoalesce->tx_max_coalesced_frames > 1 &&
!ecoalesce->tx_coalesce_usecs)) {
@@ -2074,11 +2233,31 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
return -EINVAL;
}
- lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames;
- lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs;
- lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames;
- lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs;
+ if (new_dim && !old_dim) {
+ cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp),
+ ecoalesce->rx_coalesce_usecs);
+ } else if (!new_dim) {
+ if (old_dim) {
+ WRITE_ONCE(lp->rx_dim_enabled, false);
+ napi_synchronize(&lp->napi_rx);
+ flush_work(&lp->rx_dim.work);
+ }
+
+ cr = axienet_calc_cr(lp, ecoalesce->rx_max_coalesced_frames,
+ ecoalesce->rx_coalesce_usecs);
+ } else {
+ /* Dummy value for count just to calculate timer */
+ cr = axienet_calc_cr(lp, 2, ecoalesce->rx_coalesce_usecs);
+ mask = XAXIDMA_DELAY_MASK | XAXIDMA_IRQ_DELAY_MASK;
+ }
+
+ axienet_update_coalesce_rx(lp, cr, mask);
+ if (new_dim && !old_dim)
+ WRITE_ONCE(lp->rx_dim_enabled, true);
+ cr = axienet_calc_cr(lp, ecoalesce->tx_max_coalesced_frames,
+ ecoalesce->tx_coalesce_usecs);
+ axienet_update_coalesce_tx(lp, cr, ~XAXIDMA_CR_RUNSTOP_MASK);
return 0;
}
@@ -2316,7 +2495,8 @@ axienet_ethtool_get_rmon_stats(struct net_device *dev,
static const struct ethtool_ops axienet_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
- ETHTOOL_COALESCE_USECS,
+ ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
.get_drvinfo = axienet_ethtools_get_drvinfo,
.get_regs_len = axienet_ethtools_get_regs_len,
.get_regs = axienet_ethtools_get_regs,
@@ -2499,6 +2679,7 @@ static void axienet_dma_err_handler(struct work_struct *work)
~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
axienet_dma_stop(lp);
+ netdev_reset_queue(ndev);
for (i = 0; i < lp->tx_bd_num; i++) {
cur_p = &lp->tx_bd_v[i];
@@ -2858,10 +3039,15 @@ static int axienet_probe(struct platform_device *pdev)
axienet_set_mac_address(ndev, NULL);
}
- lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
- lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
- lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC;
- lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC;
+ spin_lock_init(&lp->rx_cr_lock);
+ spin_lock_init(&lp->tx_cr_lock);
+ INIT_WORK(&lp->rx_dim.work, axienet_rx_dim_work);
+ lp->rx_dim_enabled = true;
+ lp->rx_dim.profile_ix = 1;
+ lp->rx_dma_cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp),
+ XAXIDMA_DFT_RX_USEC);
+ lp->tx_dma_cr = axienet_calc_cr(lp, XAXIDMA_DFT_TX_THRESHOLD,
+ XAXIDMA_DFT_TX_USEC);
ret = axienet_mdio_setup(lp);
if (ret)
@@ -2891,7 +3077,6 @@ static int axienet_probe(struct platform_device *pdev)
}
of_node_put(np);
lp->pcs.ops = &axienet_pcs_ops;
- lp->pcs.neg_mode = true;
lp->pcs.poll = true;
}
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index 00ebc25d0b22..f03797103c6a 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -427,7 +427,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data,
break;
case HDLCDRVCTL_GETMODE:
- strcpy(hi->data.modename, bc->options ? "par96" : "picpar");
+ strscpy(hi->data.modename, bc->options ? "par96" : "picpar");
if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
@@ -439,7 +439,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data,
return baycom_setmode(bc, hi->data.modename);
case HDLCDRVCTL_MODELIST:
- strcpy(hi->data.modename, "par96,picpar");
+ strscpy(hi->data.modename, "par96,picpar");
if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index 799f8ece7824..ee5bd3c12040 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -531,7 +531,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data,
return baycom_setmode(bc, hi->data.modename);
case HDLCDRVCTL_MODELIST:
- strcpy(hi->data.modename, "ser12,ser3,ser24");
+ strscpy(hi->data.modename, "ser12,ser3,ser24");
if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
index 5d1ab4840753..05bdad214799 100644
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -570,7 +570,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data,
break;
case HDLCDRVCTL_GETMODE:
- strcpy(hi->data.modename, "ser12");
+ strscpy(hi->data.modename, "ser12");
if (bc->opt_dcd <= 0)
strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : (bc->opt_dcd == -2) ? "@" : "+");
if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
@@ -584,7 +584,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data,
return baycom_setmode(bc, hi->data.modename);
case HDLCDRVCTL_MODELIST:
- strcpy(hi->data.modename, "ser12");
+ strscpy(hi->data.modename, "ser12");
if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
return -EFAULT;
return 0;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 234db693cefa..70f7cb383228 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -1166,6 +1166,8 @@ struct netvsc_device {
u32 max_chn;
u32 num_chn;
+ u32 netvsc_gso_max_size;
+
atomic_t open_chn;
struct work_struct subchan_work;
wait_queue_head_t subchan_open;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d6c4abfc3a28..9c6501bf27bd 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2461,6 +2461,21 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
} else {
netdev_info(ndev, "Data path switched %s VF: %s\n",
vf_is_up ? "to" : "from", vf_netdev->name);
+
+ /* In Azure, when accelerated networking in enabled, other NICs
+ * like MANA, MLX, are configured as a bonded nic with
+ * Netvsc(failover) NIC. For bonded NICs, the min of the max
+ * pkt aggregate size of the members is propagated in the stack.
+ * In order to allow these NICs (MANA/MLX) to use up to
+ * GSO_MAX_SIZE gso packet size, we need to allow Netvsc NIC to
+ * also support this in the guest.
+ * This value is only increased for netvsc NIC when datapath is
+ * switched over to the VF
+ */
+ if (vf_is_up)
+ netif_set_tso_max_size(ndev, vf_netdev->tso_max_size);
+ else
+ netif_set_tso_max_size(ndev, netvsc_dev->netvsc_gso_max_size);
}
return NOTIFY_OK;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index c0ceeef4fcd8..82747dfacd70 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1356,9 +1356,10 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
struct net_device_context *net_device_ctx = netdev_priv(net);
struct ndis_offload hwcaps;
struct ndis_offload_params offloads;
- unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE;
int ret;
+ nvdev->netvsc_gso_max_size = GSO_LEGACY_MAX_SIZE;
+
/* Find HW offload capabilities */
ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps);
if (ret != 0)
@@ -1390,8 +1391,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
net->hw_features |= NETIF_F_TSO;
- if (hwcaps.lsov2.ip4_maxsz < gso_max_size)
- gso_max_size = hwcaps.lsov2.ip4_maxsz;
+ if (hwcaps.lsov2.ip4_maxsz < nvdev->netvsc_gso_max_size)
+ nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip4_maxsz;
}
if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
@@ -1411,8 +1412,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
net->hw_features |= NETIF_F_TSO6;
- if (hwcaps.lsov2.ip6_maxsz < gso_max_size)
- gso_max_size = hwcaps.lsov2.ip6_maxsz;
+ if (hwcaps.lsov2.ip6_maxsz < nvdev->netvsc_gso_max_size)
+ nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip6_maxsz;
}
if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) {
@@ -1438,7 +1439,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
*/
net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features;
- netif_set_tso_max_size(net, gso_max_size);
+ netif_set_tso_max_size(net, nvdev->netvsc_gso_max_size);
ret = rndis_filter_set_offload_params(net, nvdev, &offloads);
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 86ab4a42769a..f77eddf22185 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -45,12 +45,12 @@ MODULE_DESCRIPTION("Console driver for network interfaces");
MODULE_LICENSE("GPL");
#define MAX_PARAM_LENGTH 256
-#define MAX_USERDATA_ENTRY_LENGTH 256
-#define MAX_USERDATA_VALUE_LENGTH 200
+#define MAX_EXTRADATA_ENTRY_LEN 256
+#define MAX_EXTRADATA_VALUE_LEN 200
/* The number 3 comes from userdata entry format characters (' ', '=', '\n') */
-#define MAX_USERDATA_NAME_LENGTH (MAX_USERDATA_ENTRY_LENGTH - \
- MAX_USERDATA_VALUE_LENGTH - 3)
-#define MAX_USERDATA_ITEMS 16
+#define MAX_EXTRADATA_NAME_LEN (MAX_EXTRADATA_ENTRY_LEN - \
+ MAX_EXTRADATA_VALUE_LEN - 3)
+#define MAX_EXTRADATA_ITEMS 16
#define MAX_PRINT_CHUNK 1000
static char config[MAX_PARAM_LENGTH];
@@ -97,13 +97,23 @@ struct netconsole_target_stats {
struct u64_stats_sync syncp;
};
+/* Features enabled in sysdata. Contrary to userdata, this data is populated by
+ * the kernel. The fields are designed as bitwise flags, allowing multiple
+ * features to be set in sysdata_fields.
+ */
+enum sysdata_feature {
+ /* Populate the CPU that sends the message */
+ CPU_NR = BIT(0),
+};
+
/**
* struct netconsole_target - Represents a configured netconsole target.
* @list: Links this target into the target_list.
* @group: Links us into the configfs subsystem hierarchy.
* @userdata_group: Links to the userdata configfs hierarchy
- * @userdata_complete: Cached, formatted string of append
- * @userdata_length: String length of userdata_complete
+ * @extradata_complete: Cached, formatted string of append
+ * @userdata_length: String length of usedata in extradata_complete.
+ * @sysdata_fields: Sysdata features enabled.
* @stats: Packet send stats for the target. Used for debugging.
* @enabled: On / off knob to enable / disable target.
* Visible from userspace (read-write).
@@ -123,20 +133,25 @@ struct netconsole_target_stats {
* remote_ip (read-write)
* local_mac (read-only)
* remote_mac (read-write)
+ * @buf: The buffer used to send the full msg to the network stack
*/
struct netconsole_target {
struct list_head list;
#ifdef CONFIG_NETCONSOLE_DYNAMIC
struct config_group group;
struct config_group userdata_group;
- char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS];
+ char extradata_complete[MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS];
size_t userdata_length;
+ /* bit-wise with sysdata_feature bits */
+ u32 sysdata_fields;
#endif
struct netconsole_target_stats stats;
bool enabled;
bool extended;
bool release;
struct netpoll np;
+ /* protected by target_list_lock */
+ char buf[MAX_PRINT_CHUNK];
};
#ifdef CONFIG_NETCONSOLE_DYNAMIC
@@ -396,6 +411,19 @@ static ssize_t transmit_errors_show(struct config_item *item, char *buf)
return sysfs_emit(buf, "%llu\n", xmit_drop_count + enomem_count);
}
+/* configfs helper to display if cpu_nr sysdata feature is enabled */
+static ssize_t sysdata_cpu_nr_enabled_show(struct config_item *item, char *buf)
+{
+ struct netconsole_target *nt = to_target(item->ci_parent);
+ bool cpu_nr_enabled;
+
+ mutex_lock(&dynamic_netconsole_mutex);
+ cpu_nr_enabled = !!(nt->sysdata_fields & CPU_NR);
+ mutex_unlock(&dynamic_netconsole_mutex);
+
+ return sysfs_emit(buf, "%d\n", cpu_nr_enabled);
+}
+
/*
* This one is special -- targets created through the configfs interface
* are not enabled (and the corresponding netpoll activated) by default.
@@ -659,6 +687,24 @@ out_unlock:
return ret;
}
+/* Count number of entries we have in extradata.
+ * This is important because the extradata_complete only supports
+ * MAX_EXTRADATA_ITEMS entries. Before enabling any new {user,sys}data
+ * feature, number of entries needs to checked for available space.
+ */
+static size_t count_extradata_entries(struct netconsole_target *nt)
+{
+ size_t entries;
+
+ /* Userdata entries */
+ entries = list_count_nodes(&nt->userdata_group.cg_children);
+ /* Plus sysdata entries */
+ if (nt->sysdata_fields & CPU_NR)
+ entries += 1;
+
+ return entries;
+}
+
static ssize_t remote_mac_store(struct config_item *item, const char *buf,
size_t count)
{
@@ -687,7 +733,7 @@ out_unlock:
struct userdatum {
struct config_item item;
- char value[MAX_USERDATA_VALUE_LENGTH];
+ char value[MAX_EXTRADATA_VALUE_LEN];
};
static struct userdatum *to_userdatum(struct config_item *item)
@@ -724,13 +770,13 @@ static void update_userdata(struct netconsole_target *nt)
/* Clear the current string in case the last userdatum was deleted */
nt->userdata_length = 0;
- nt->userdata_complete[0] = 0;
+ nt->extradata_complete[0] = 0;
list_for_each(entry, &nt->userdata_group.cg_children) {
struct userdatum *udm_item;
struct config_item *item;
- if (WARN_ON_ONCE(child_count >= MAX_USERDATA_ITEMS))
+ if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS))
break;
child_count++;
@@ -738,19 +784,19 @@ static void update_userdata(struct netconsole_target *nt)
udm_item = to_userdatum(item);
/* Skip userdata with no value set */
- if (strnlen(udm_item->value, MAX_USERDATA_VALUE_LENGTH) == 0)
+ if (strnlen(udm_item->value, MAX_EXTRADATA_VALUE_LEN) == 0)
continue;
- /* This doesn't overflow userdata_complete since it will write
- * one entry length (1/MAX_USERDATA_ITEMS long), entry count is
+ /* This doesn't overflow extradata_complete since it will write
+ * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is
* checked to not exceed MAX items with child_count above
*/
- complete_idx += scnprintf(&nt->userdata_complete[complete_idx],
- MAX_USERDATA_ENTRY_LENGTH, " %s=%s\n",
+ complete_idx += scnprintf(&nt->extradata_complete[complete_idx],
+ MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n",
item->ci_name, udm_item->value);
}
- nt->userdata_length = strnlen(nt->userdata_complete,
- sizeof(nt->userdata_complete));
+ nt->userdata_length = strnlen(nt->extradata_complete,
+ sizeof(nt->extradata_complete));
}
static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
@@ -761,7 +807,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
struct userdata *ud;
ssize_t ret;
- if (count > MAX_USERDATA_VALUE_LENGTH)
+ if (count > MAX_EXTRADATA_VALUE_LEN)
return -EMSGSIZE;
mutex_lock(&dynamic_netconsole_mutex);
@@ -780,7 +826,62 @@ out_unlock:
return ret;
}
+/* disable_sysdata_feature - Disable sysdata feature and clean sysdata
+ * @nt: target that is disabling the feature
+ * @feature: feature being disabled
+ */
+static void disable_sysdata_feature(struct netconsole_target *nt,
+ enum sysdata_feature feature)
+{
+ nt->sysdata_fields &= ~feature;
+ nt->extradata_complete[nt->userdata_length] = 0;
+}
+
+/* configfs helper to sysdata cpu_nr feature */
+static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct netconsole_target *nt = to_target(item->ci_parent);
+ bool cpu_nr_enabled, curr;
+ ssize_t ret;
+
+ ret = kstrtobool(buf, &cpu_nr_enabled);
+ if (ret)
+ return ret;
+
+ mutex_lock(&dynamic_netconsole_mutex);
+ curr = nt->sysdata_fields & CPU_NR;
+ if (cpu_nr_enabled == curr)
+ /* no change requested */
+ goto unlock_ok;
+
+ if (cpu_nr_enabled &&
+ count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) {
+ /* user wants the new feature, but there is no space in the
+ * buffer.
+ */
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
+ if (cpu_nr_enabled)
+ nt->sysdata_fields |= CPU_NR;
+ else
+ /* This is special because extradata_complete might have
+ * remaining data from previous sysdata, and it needs to be
+ * cleaned.
+ */
+ disable_sysdata_feature(nt, CPU_NR);
+
+unlock_ok:
+ ret = strnlen(buf, count);
+unlock:
+ mutex_unlock(&dynamic_netconsole_mutex);
+ return ret;
+}
+
CONFIGFS_ATTR(userdatum_, value);
+CONFIGFS_ATTR(sysdata_, cpu_nr_enabled);
static struct configfs_attribute *userdatum_attrs[] = {
&userdatum_attr_value,
@@ -808,15 +909,13 @@ static struct config_item *userdatum_make_item(struct config_group *group,
struct netconsole_target *nt;
struct userdatum *udm;
struct userdata *ud;
- size_t child_count;
- if (strlen(name) > MAX_USERDATA_NAME_LENGTH)
+ if (strlen(name) > MAX_EXTRADATA_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
ud = to_userdata(&group->cg_item);
nt = userdata_to_target(ud);
- child_count = list_count_nodes(&nt->userdata_group.cg_children);
- if (child_count >= MAX_USERDATA_ITEMS)
+ if (count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS)
return ERR_PTR(-ENOSPC);
udm = kzalloc(sizeof(*udm), GFP_KERNEL);
@@ -842,6 +941,7 @@ static void userdatum_drop(struct config_group *group, struct config_item *item)
}
static struct configfs_attribute *userdata_attrs[] = {
+ &sysdata_attr_cpu_nr_enabled,
NULL,
};
@@ -1017,6 +1117,40 @@ static void populate_configfs_item(struct netconsole_target *nt,
init_target_config_group(nt, target_name);
}
+/*
+ * prepare_extradata - append sysdata at extradata_complete in runtime
+ * @nt: target to send message to
+ */
+static int prepare_extradata(struct netconsole_target *nt)
+{
+ int sysdata_len, extradata_len;
+
+ /* userdata was appended when configfs write helper was called
+ * by update_userdata().
+ */
+ extradata_len = nt->userdata_length;
+
+ if (!(nt->sysdata_fields & CPU_NR))
+ goto out;
+
+ /* Append cpu=%d at extradata_complete after userdata str */
+ sysdata_len = scnprintf(&nt->extradata_complete[nt->userdata_length],
+ MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n",
+ raw_smp_processor_id());
+
+ extradata_len += sysdata_len;
+
+ WARN_ON_ONCE(extradata_len >
+ MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS);
+
+out:
+ return extradata_len;
+}
+#else /* CONFIG_NETCONSOLE_DYNAMIC not set */
+static int prepare_extradata(struct netconsole_target *nt)
+{
+ return 0;
+}
#endif /* CONFIG_NETCONSOLE_DYNAMIC */
/* Handle network interface device notifications */
@@ -1117,29 +1251,28 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt,
int msg_len,
int release_len)
{
- static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */
- const char *userdata = NULL;
+ const char *extradata = NULL;
const char *release;
#ifdef CONFIG_NETCONSOLE_DYNAMIC
- userdata = nt->userdata_complete;
+ extradata = nt->extradata_complete;
#endif
if (release_len) {
release = init_utsname()->release;
- scnprintf(buf, MAX_PRINT_CHUNK, "%s,%s", release, msg);
+ scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg);
msg_len += release_len;
} else {
- memcpy(buf, msg, msg_len);
+ memcpy(nt->buf, msg, msg_len);
}
- if (userdata)
- msg_len += scnprintf(&buf[msg_len],
+ if (extradata)
+ msg_len += scnprintf(&nt->buf[msg_len],
MAX_PRINT_CHUNK - msg_len,
- "%s", userdata);
+ "%s", extradata);
- send_udp(nt, buf, msg_len);
+ send_udp(nt, nt->buf, msg_len);
}
static void append_release(char *buf)
@@ -1150,28 +1283,27 @@ static void append_release(char *buf)
scnprintf(buf, MAX_PRINT_CHUNK, "%s,", release);
}
-static void send_fragmented_body(struct netconsole_target *nt, char *buf,
+static void send_fragmented_body(struct netconsole_target *nt,
const char *msgbody, int header_len,
- int msgbody_len)
+ int msgbody_len, int extradata_len)
{
- const char *userdata = NULL;
+ int sent_extradata, preceding_bytes;
+ const char *extradata = NULL;
int body_len, offset = 0;
- int userdata_len = 0;
#ifdef CONFIG_NETCONSOLE_DYNAMIC
- userdata = nt->userdata_complete;
- userdata_len = nt->userdata_length;
+ extradata = nt->extradata_complete;
#endif
/* body_len represents the number of bytes that will be sent. This is
* bigger than MAX_PRINT_CHUNK, thus, it will be split in multiple
* packets
*/
- body_len = msgbody_len + userdata_len;
+ body_len = msgbody_len + extradata_len;
/* In each iteration of the while loop below, we send a packet
* containing the header and a portion of the body. The body is
- * composed of two parts: msgbody and userdata. We keep track of how
+ * composed of two parts: msgbody and extradata. We keep track of how
* many bytes have been sent so far using the offset variable, which
* ranges from 0 to the total length of the body.
*/
@@ -1181,7 +1313,7 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf,
int this_offset = 0;
int this_chunk = 0;
- this_header += scnprintf(buf + this_header,
+ this_header += scnprintf(nt->buf + this_header,
MAX_PRINT_CHUNK - this_header,
",ncfrag=%d/%d;", offset,
body_len);
@@ -1192,47 +1324,48 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf,
MAX_PRINT_CHUNK - this_header);
if (WARN_ON_ONCE(this_chunk <= 0))
return;
- memcpy(buf + this_header, msgbody + offset, this_chunk);
+ memcpy(nt->buf + this_header, msgbody + offset,
+ this_chunk);
this_offset += this_chunk;
}
/* msgbody was finally written, either in the previous
* messages and/or in the current buf. Time to write
- * the userdata.
+ * the extradata.
*/
msgbody_written |= offset + this_offset >= msgbody_len;
- /* Msg body is fully written and there is pending userdata to
- * write, append userdata in this chunk
+ /* Msg body is fully written and there is pending extradata to
+ * write, append extradata in this chunk
*/
if (msgbody_written && offset + this_offset < body_len) {
/* Track how much user data was already sent. First
* time here, sent_userdata is zero
*/
- int sent_userdata = (offset + this_offset) - msgbody_len;
+ sent_extradata = (offset + this_offset) - msgbody_len;
/* offset of bytes used in current buf */
- int preceding_bytes = this_chunk + this_header;
+ preceding_bytes = this_chunk + this_header;
- if (WARN_ON_ONCE(sent_userdata < 0))
+ if (WARN_ON_ONCE(sent_extradata < 0))
return;
- this_chunk = min(userdata_len - sent_userdata,
+ this_chunk = min(extradata_len - sent_extradata,
MAX_PRINT_CHUNK - preceding_bytes);
if (WARN_ON_ONCE(this_chunk < 0))
/* this_chunk could be zero if all the previous
* message used all the buffer. This is not a
- * problem, userdata will be sent in the next
+ * problem, extradata will be sent in the next
* iteration
*/
return;
- memcpy(buf + this_header + this_offset,
- userdata + sent_userdata,
+ memcpy(nt->buf + this_header + this_offset,
+ extradata + sent_extradata,
this_chunk);
this_offset += this_chunk;
}
- send_udp(nt, buf, this_header + this_offset);
+ send_udp(nt, nt->buf, this_header + this_offset);
offset += this_offset;
}
}
@@ -1240,9 +1373,9 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf,
static void send_msg_fragmented(struct netconsole_target *nt,
const char *msg,
int msg_len,
- int release_len)
+ int release_len,
+ int extradata_len)
{
- static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */
int header_len, msgbody_len;
const char *msgbody;
@@ -1260,16 +1393,17 @@ static void send_msg_fragmented(struct netconsole_target *nt,
* "ncfrag=<byte-offset>/<total-bytes>"
*/
if (release_len)
- append_release(buf);
+ append_release(nt->buf);
/* Copy the header into the buffer */
- memcpy(buf + release_len, msg, header_len);
+ memcpy(nt->buf + release_len, msg, header_len);
header_len += release_len;
/* for now on, the header will be persisted, and the msgbody
* will be replaced
*/
- send_fragmented_body(nt, buf, msgbody, header_len, msgbody_len);
+ send_fragmented_body(nt, msgbody, header_len, msgbody_len,
+ extradata_len);
}
/**
@@ -1285,20 +1419,19 @@ static void send_msg_fragmented(struct netconsole_target *nt,
static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg,
int msg_len)
{
- int userdata_len = 0;
int release_len = 0;
+ int extradata_len;
-#ifdef CONFIG_NETCONSOLE_DYNAMIC
- userdata_len = nt->userdata_length;
-#endif
+ extradata_len = prepare_extradata(nt);
if (nt->release)
release_len = strlen(init_utsname()->release) + 1;
- if (msg_len + release_len + userdata_len <= MAX_PRINT_CHUNK)
+ if (msg_len + release_len + extradata_len <= MAX_PRINT_CHUNK)
return send_msg_no_fragmentation(nt, msg, msg_len, release_len);
- return send_msg_fragmented(nt, msg, msg_len, release_len);
+ return send_msg_fragmented(nt, msg, msg_len, release_len,
+ extradata_len);
}
static void write_ext_msg(struct console *con, const char *msg,
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 42f247cbdcee..9b394ddc5206 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -645,8 +645,11 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
if (ns->rq_reset_mode > 3)
return -EINVAL;
- if (ns->rq_reset_mode == 1)
+ if (ns->rq_reset_mode == 1) {
+ if (!netif_running(ns->netdev))
+ return -ENETDOWN;
return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
+ }
qmem->rq = nsim_queue_alloc();
if (!qmem->rq)
@@ -754,11 +757,6 @@ nsim_qreset_write(struct file *file, const char __user *data,
return -EINVAL;
rtnl_lock();
- if (!netif_running(ns->netdev)) {
- ret = -ENETDOWN;
- goto exit_unlock;
- }
-
if (queue >= ns->netdev->real_num_rx_queues) {
ret = -EINVAL;
goto exit_unlock;
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index e46f588cae7d..23b40e9eacbb 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -355,7 +355,6 @@ static struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
mdio_device_get(mdio);
lynx->mdio = mdio;
lynx->pcs.ops = &lynx_pcs_phylink_ops;
- lynx->pcs.neg_mode = true;
lynx->pcs.poll = true;
for (i = 0; i < ARRAY_SIZE(lynx_interfaces); i++)
diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c
index 7d6261dee534..149ddf51d785 100644
--- a/drivers/net/pcs/pcs-mtk-lynxi.c
+++ b/drivers/net/pcs/pcs-mtk-lynxi.c
@@ -305,7 +305,6 @@ struct phylink_pcs *mtk_pcs_lynxi_create(struct device *dev,
mpcs->regmap = regmap;
mpcs->flags = flags;
mpcs->pcs.ops = &mtk_pcs_lynxi_ops;
- mpcs->pcs.neg_mode = true;
mpcs->pcs.poll = true;
mpcs->interface = PHY_INTERFACE_MODE_NA;
diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c
index 61944574d087..d79bb9b06cd2 100644
--- a/drivers/net/pcs/pcs-rzn1-miic.c
+++ b/drivers/net/pcs/pcs-rzn1-miic.c
@@ -268,17 +268,6 @@ static void miic_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
(MIIC_CONVCTRL_CONV_SPEED | MIIC_CONVCTRL_FULLD), val);
}
-static int miic_validate(struct phylink_pcs *pcs, unsigned long *supported,
- const struct phylink_link_state *state)
-{
- if (phy_interface_mode_is_rgmii(state->interface) ||
- state->interface == PHY_INTERFACE_MODE_RMII ||
- state->interface == PHY_INTERFACE_MODE_MII)
- return 1;
-
- return -EINVAL;
-}
-
static int miic_pre_init(struct phylink_pcs *pcs)
{
struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs);
@@ -307,7 +296,6 @@ static int miic_pre_init(struct phylink_pcs *pcs)
}
static const struct phylink_pcs_ops miic_phylink_ops = {
- .pcs_validate = miic_validate,
.pcs_config = miic_config,
.pcs_link_up = miic_link_up,
.pcs_pre_init = miic_pre_init,
@@ -361,7 +349,10 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np)
miic_port->miic = miic;
miic_port->port = port - 1;
miic_port->pcs.ops = &miic_phylink_ops;
- miic_port->pcs.neg_mode = true;
+
+ phy_interface_set_rgmii(miic_port->pcs.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_RMII, miic_port->pcs.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_MII, miic_port->pcs.supported_interfaces);
return &miic_port->pcs;
}
@@ -472,13 +463,10 @@ static int miic_parse_dt(struct device *dev, u32 *mode_cfg)
if (of_property_read_u32(np, "renesas,miic-switch-portin", &conf) == 0)
dt_val[0] = conf;
- for_each_child_of_node(np, conv) {
+ for_each_available_child_of_node(np, conv) {
if (of_property_read_u32(conv, "reg", &port))
continue;
- if (!of_device_is_available(conv))
- continue;
-
if (of_property_read_u32(conv, "renesas,miic-input", &conf) == 0)
dt_val[port] = conf;
}
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 1faa37f0e7b9..e32dec4b812e 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -602,36 +602,6 @@ static void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
__set_bit(compat->interface, interfaces);
}
-int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
-{
- u16 mask, val;
- int ret;
-
- mask = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN |
- DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN |
- DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL |
- DW_VR_MII_EEE_MULT_FACT_100NS;
-
- if (enable)
- val = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN |
- DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN |
- DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL |
- FIELD_PREP(DW_VR_MII_EEE_MULT_FACT_100NS,
- mult_fact_100ns);
- else
- val = 0;
-
- ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0, mask,
- val);
- if (ret < 0)
- return ret;
-
- return xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1,
- DW_VR_MII_EEE_TRN_LPI,
- enable ? DW_VR_MII_EEE_TRN_LPI : 0);
-}
-EXPORT_SYMBOL_GPL(xpcs_config_eee);
-
static void xpcs_pre_config(struct phylink_pcs *pcs, phy_interface_t interface)
{
struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
@@ -1193,6 +1163,63 @@ static void xpcs_an_restart(struct phylink_pcs *pcs)
BMCR_ANRESTART);
}
+static int xpcs_config_eee(struct dw_xpcs *xpcs, bool enable)
+{
+ u16 mask, val;
+ int ret;
+
+ mask = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN |
+ DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN |
+ DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL |
+ DW_VR_MII_EEE_MULT_FACT_100NS;
+
+ if (enable)
+ val = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN |
+ DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN |
+ DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL |
+ FIELD_PREP(DW_VR_MII_EEE_MULT_FACT_100NS,
+ xpcs->eee_mult_fact);
+ else
+ val = 0;
+
+ ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0, mask,
+ val);
+ if (ret < 0)
+ return ret;
+
+ return xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1,
+ DW_VR_MII_EEE_TRN_LPI,
+ enable ? DW_VR_MII_EEE_TRN_LPI : 0);
+}
+
+static void xpcs_disable_eee(struct phylink_pcs *pcs)
+{
+ struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
+
+ xpcs_config_eee(xpcs, false);
+}
+
+static void xpcs_enable_eee(struct phylink_pcs *pcs)
+{
+ struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
+
+ xpcs_config_eee(xpcs, true);
+}
+
+/**
+ * xpcs_config_eee_mult_fact() - set the EEE clock multiplying factor
+ * @xpcs: pointer to a &struct dw_xpcs instance
+ * @mult_fact: the multiplying factor
+ *
+ * Configure the EEE clock multiplying factor. This value should be such that
+ * clk_eee_time_period * (mult_fact + 1) is within the range 80 to 120ns.
+ */
+void xpcs_config_eee_mult_fact(struct dw_xpcs *xpcs, u8 mult_fact)
+{
+ xpcs->eee_mult_fact = mult_fact;
+}
+EXPORT_SYMBOL_GPL(xpcs_config_eee_mult_fact);
+
static int xpcs_read_ids(struct dw_xpcs *xpcs)
{
int ret;
@@ -1341,6 +1368,8 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = {
.pcs_get_state = xpcs_get_state,
.pcs_an_restart = xpcs_an_restart,
.pcs_link_up = xpcs_link_up,
+ .pcs_disable_eee = xpcs_disable_eee,
+ .pcs_enable_eee = xpcs_enable_eee,
};
static int xpcs_identify(struct dw_xpcs *xpcs)
@@ -1374,7 +1403,6 @@ static struct dw_xpcs *xpcs_create_data(struct mdio_device *mdiodev)
mdio_device_get(mdiodev);
xpcs->mdiodev = mdiodev;
xpcs->pcs.ops = &xpcs_phylink_ops;
- xpcs->pcs.neg_mode = true;
xpcs->pcs.poll = true;
return xpcs;
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
index adc5a0b3c883..929fa238445e 100644
--- a/drivers/net/pcs/pcs-xpcs.h
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -55,23 +55,11 @@
/* Clause 37 Defines */
/* VR MII MMD registers offsets */
#define DW_VR_MII_DIG_CTRL1 0x8000
-#define DW_VR_MII_AN_CTRL 0x8001
-#define DW_VR_MII_AN_INTR_STS 0x8002
-/* EEE Mode Control Register */
-#define DW_VR_MII_EEE_MCTRL0 0x8006
-#define DW_VR_MII_EEE_MCTRL1 0x800b
-#define DW_VR_MII_DIG_CTRL2 0x80e1
-
-/* VR_MII_DIG_CTRL1 */
#define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW BIT(9)
#define DW_VR_MII_DIG_CTRL1_2G5_EN BIT(2)
#define DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL BIT(0)
-/* VR_MII_DIG_CTRL2 */
-#define DW_VR_MII_DIG_CTRL2_TX_POL_INV BIT(4)
-#define DW_VR_MII_DIG_CTRL2_RX_POL_INV BIT(0)
-
-/* VR_MII_AN_CTRL */
+#define DW_VR_MII_AN_CTRL 0x8001
#define DW_VR_MII_AN_CTRL_8BIT BIT(8)
#define DW_VR_MII_TX_CONFIG_MASK BIT(3)
#define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII 0x1
@@ -81,7 +69,7 @@
#define DW_VR_MII_PCS_MODE_C37_SGMII 0x2
#define DW_VR_MII_AN_INTR_EN BIT(0)
-/* VR_MII_AN_INTR_STS */
+#define DW_VR_MII_AN_INTR_STS 0x8002
#define DW_VR_MII_AN_STS_C37_ANCMPLT_INTR BIT(0)
#define DW_VR_MII_AN_STS_C37_ANSGM_FD BIT(1)
#define DW_VR_MII_AN_STS_C37_ANSGM_SP GENMASK(3, 2)
@@ -90,19 +78,22 @@
#define DW_VR_MII_C37_ANSGM_SP_1000 0x2
#define DW_VR_MII_C37_ANSGM_SP_LNKSTS BIT(4)
-/* VR MII EEE Control 0 defines */
+#define DW_VR_MII_EEE_MCTRL0 0x8006
#define DW_VR_MII_EEE_LTX_EN BIT(0) /* LPI Tx Enable */
#define DW_VR_MII_EEE_LRX_EN BIT(1) /* LPI Rx Enable */
#define DW_VR_MII_EEE_TX_QUIET_EN BIT(2) /* Tx Quiet Enable */
#define DW_VR_MII_EEE_RX_QUIET_EN BIT(3) /* Rx Quiet Enable */
#define DW_VR_MII_EEE_TX_EN_CTRL BIT(4) /* Tx Control Enable */
#define DW_VR_MII_EEE_RX_EN_CTRL BIT(7) /* Rx Control Enable */
-
#define DW_VR_MII_EEE_MULT_FACT_100NS GENMASK(11, 8)
-/* VR MII EEE Control 1 defines */
+#define DW_VR_MII_EEE_MCTRL1 0x800b
#define DW_VR_MII_EEE_TRN_LPI BIT(0) /* Transparent Mode Enable */
+#define DW_VR_MII_DIG_CTRL2 0x80e1
+#define DW_VR_MII_DIG_CTRL2_TX_POL_INV BIT(4)
+#define DW_VR_MII_DIG_CTRL2_RX_POL_INV BIT(0)
+
#define DW_XPCS_INFO_DECLARE(_name, _pcs, _pma) \
static const struct dw_xpcs_info _name = { .pcs = _pcs, .pma = _pma }
@@ -122,6 +113,7 @@ struct dw_xpcs {
struct phylink_pcs pcs;
phy_interface_t interface;
bool need_reset;
+ u8 eee_mult_fact;
};
int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg);
diff --git a/drivers/net/phy/aquantia/aquantia_hwmon.c b/drivers/net/phy/aquantia/aquantia_hwmon.c
index 7b3c49c3bf49..1a714b56b765 100644
--- a/drivers/net/phy/aquantia/aquantia_hwmon.c
+++ b/drivers/net/phy/aquantia/aquantia_hwmon.c
@@ -172,33 +172,13 @@ static const struct hwmon_ops aqr_hwmon_ops = {
.write = aqr_hwmon_write,
};
-static u32 aqr_hwmon_chip_config[] = {
- HWMON_C_REGISTER_TZ,
- 0,
-};
-
-static const struct hwmon_channel_info aqr_hwmon_chip = {
- .type = hwmon_chip,
- .config = aqr_hwmon_chip_config,
-};
-
-static u32 aqr_hwmon_temp_config[] = {
- HWMON_T_INPUT |
- HWMON_T_MAX | HWMON_T_MIN |
- HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM |
- HWMON_T_CRIT | HWMON_T_LCRIT |
- HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM,
- 0,
-};
-
-static const struct hwmon_channel_info aqr_hwmon_temp = {
- .type = hwmon_temp,
- .config = aqr_hwmon_temp_config,
-};
-
static const struct hwmon_channel_info * const aqr_hwmon_info[] = {
- &aqr_hwmon_chip,
- &aqr_hwmon_temp,
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT |
+ HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM |
+ HWMON_T_CRIT | HWMON_T_LCRIT |
+ HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM),
NULL,
};
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 22edb7e4c1a1..13e43fee1906 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -16,7 +16,7 @@
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/phy.h>
-#include <linux/pm_wakeup.h>
+#include <linux/device.h>
#include <linux/brcmphy.h>
#include <linux/of.h>
#include <linux/interrupt.h>
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index 6599feca1967..3662f3905d5a 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -31,6 +31,7 @@
#define MII_DP83822_RCSR 0x17
#define MII_DP83822_RESET_CTRL 0x1f
#define MII_DP83822_MLEDCR 0x25
+#define MII_DP83822_LDCTRL 0x403
#define MII_DP83822_LEDCFG1 0x460
#define MII_DP83822_IOCTRL1 0x462
#define MII_DP83822_IOCTRL2 0x463
@@ -123,6 +124,9 @@
#define DP83822_IOCTRL1_GPIO1_CTRL GENMASK(2, 0)
#define DP83822_IOCTRL1_GPIO1_CTRL_LED_1 BIT(0)
+/* LDCTRL bits */
+#define DP83822_100BASE_TX_LINE_DRIVER_SWING GENMASK(7, 4)
+
/* IOCTRL2 bits */
#define DP83822_IOCTRL2_GPIO2_CLK_SRC GENMASK(6, 4)
#define DP83822_IOCTRL2_GPIO2_CTRL GENMASK(2, 0)
@@ -197,6 +201,7 @@ struct dp83822_private {
bool set_gpio2_clk_out;
u32 gpio2_clk_out;
bool led_pin_enable[DP83822_MAX_LED_PINS];
+ int tx_amplitude_100base_tx_index;
};
static int dp83822_config_wol(struct phy_device *phydev,
@@ -522,6 +527,12 @@ static int dp83822_config_init(struct phy_device *phydev)
FIELD_PREP(DP83822_IOCTRL2_GPIO2_CLK_SRC,
dp83822->gpio2_clk_out));
+ if (dp83822->tx_amplitude_100base_tx_index >= 0)
+ phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_LDCTRL,
+ DP83822_100BASE_TX_LINE_DRIVER_SWING,
+ FIELD_PREP(DP83822_100BASE_TX_LINE_DRIVER_SWING,
+ dp83822->tx_amplitude_100base_tx_index));
+
err = dp83822_config_init_leds(phydev);
if (err)
return err;
@@ -720,6 +731,11 @@ static int dp83822_phy_reset(struct phy_device *phydev)
}
#ifdef CONFIG_OF_MDIO
+static const u32 tx_amplitude_100base_tx_gain[] = {
+ 80, 82, 83, 85, 87, 88, 90, 92,
+ 93, 95, 97, 98, 100, 102, 103, 105,
+};
+
static int dp83822_of_init_leds(struct phy_device *phydev)
{
struct device_node *node = phydev->mdio.dev.of_node;
@@ -780,6 +796,8 @@ static int dp83822_of_init(struct phy_device *phydev)
struct dp83822_private *dp83822 = phydev->priv;
struct device *dev = &phydev->mdio.dev;
const char *of_val;
+ int i, ret;
+ u32 val;
/* Signal detection for the PHY is only enabled if the FX_EN and the
* SD_EN pins are strapped. Signal detection can only enabled if FX_EN
@@ -815,6 +833,26 @@ static int dp83822_of_init(struct phy_device *phydev)
dp83822->set_gpio2_clk_out = true;
}
+ dp83822->tx_amplitude_100base_tx_index = -1;
+ ret = phy_get_tx_amplitude_gain(phydev, dev,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ &val);
+ if (!ret) {
+ for (i = 0; i < ARRAY_SIZE(tx_amplitude_100base_tx_gain); i++) {
+ if (tx_amplitude_100base_tx_gain[i] == val) {
+ dp83822->tx_amplitude_100base_tx_index = i;
+ break;
+ }
+ }
+
+ if (dp83822->tx_amplitude_100base_tx_index < 0) {
+ phydev_err(phydev,
+ "Invalid value for tx-amplitude-100base-tx-percent property (%u)\n",
+ val);
+ return -EINVAL;
+ }
+ }
+
return dp83822_of_init_leds(phydev);
}
diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c
index a42af9c168ec..23af1ac194fa 100644
--- a/drivers/net/phy/dp83td510.c
+++ b/drivers/net/phy/dp83td510.c
@@ -204,10 +204,191 @@ struct dp83td510_priv {
#define DP83TD510E_UNKN_030E 0x30e
#define DP83TD510E_030E_VAL 0x2520
+#define DP83TD510E_LEDS_CFG_1 0x460
+#define DP83TD510E_LED_FN(idx, val) (((val) & 0xf) << ((idx) * 4))
+#define DP83TD510E_LED_FN_MASK(idx) (0xf << ((idx) * 4))
+/* link OK */
+#define DP83TD510E_LED_MODE_LINK_OK 0x0
+/* TX/RX activity */
+#define DP83TD510E_LED_MODE_TX_RX_ACTIVITY 0x1
+/* TX activity */
+#define DP83TD510E_LED_MODE_TX_ACTIVITY 0x2
+/* RX activity */
+#define DP83TD510E_LED_MODE_RX_ACTIVITY 0x3
+/* LR */
+#define DP83TD510E_LED_MODE_LR 0x4
+/* SR */
+#define DP83TD510E_LED_MODE_SR 0x5
+/* LED SPEED: High for 10Base-T */
+#define DP83TD510E_LED_MODE_LED_SPEED 0x6
+/* Duplex mode */
+#define DP83TD510E_LED_MODE_DUPLEX 0x7
+/* link + blink on activity with stretch option */
+#define DP83TD510E_LED_MODE_LINK_BLINK 0x8
+/* blink on activity with stretch option */
+#define DP83TD510E_LED_MODE_BLINK_ACTIVITY 0x9
+/* blink on tx activity with stretch option */
+#define DP83TD510E_LED_MODE_BLINK_TX 0xa
+/* blink on rx activity with stretch option */
+#define DP83TD510E_LED_MODE_BLINK_RX 0xb
+/* link_lost */
+#define DP83TD510E_LED_MODE_LINK_LOST 0xc
+/* PRBS error: toggles on error */
+#define DP83TD510E_LED_MODE_PRBS_ERROR 0xd
+/* XMII TX/RX Error with stretch option */
+#define DP83TD510E_LED_MODE_XMII_ERR 0xe
+
+#define DP83TD510E_LED_COUNT 4
+
+#define DP83TD510E_LEDS_CFG_2 0x469
+#define DP83TD510E_LED_POLARITY(idx) BIT((idx) * 4 + 2)
+#define DP83TD510E_LED_DRV_VAL(idx) BIT((idx) * 4 + 1)
+#define DP83TD510E_LED_DRV_EN(idx) BIT((idx) * 4)
+
#define DP83TD510E_ALCD_STAT 0xa9f
#define DP83TD510E_ALCD_COMPLETE BIT(15)
#define DP83TD510E_ALCD_CABLE_LENGTH GENMASK(10, 0)
+static int dp83td510_led_brightness_set(struct phy_device *phydev, u8 index,
+ enum led_brightness brightness)
+{
+ u32 val;
+
+ if (index >= DP83TD510E_LED_COUNT)
+ return -EINVAL;
+
+ val = DP83TD510E_LED_DRV_EN(index);
+
+ if (brightness)
+ val |= DP83TD510E_LED_DRV_VAL(index);
+
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2,
+ DP83TD510E_LED_DRV_VAL(index) |
+ DP83TD510E_LED_DRV_EN(index), val);
+}
+
+static int dp83td510_led_mode(u8 index, unsigned long rules)
+{
+ if (index >= DP83TD510E_LED_COUNT)
+ return -EINVAL;
+
+ switch (rules) {
+ case BIT(TRIGGER_NETDEV_LINK):
+ return DP83TD510E_LED_MODE_LINK_OK;
+ case BIT(TRIGGER_NETDEV_LINK_10):
+ return DP83TD510E_LED_MODE_LED_SPEED;
+ case BIT(TRIGGER_NETDEV_FULL_DUPLEX):
+ return DP83TD510E_LED_MODE_DUPLEX;
+ case BIT(TRIGGER_NETDEV_TX):
+ return DP83TD510E_LED_MODE_TX_ACTIVITY;
+ case BIT(TRIGGER_NETDEV_RX):
+ return DP83TD510E_LED_MODE_RX_ACTIVITY;
+ case BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX):
+ return DP83TD510E_LED_MODE_TX_RX_ACTIVITY;
+ case BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) |
+ BIT(TRIGGER_NETDEV_RX):
+ return DP83TD510E_LED_MODE_LINK_BLINK;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dp83td510_led_hw_is_supported(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ int ret;
+
+ ret = dp83td510_led_mode(index, rules);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int dp83td510_led_hw_control_set(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ int mode, ret;
+
+ mode = dp83td510_led_mode(index, rules);
+ if (mode < 0)
+ return mode;
+
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_1,
+ DP83TD510E_LED_FN_MASK(index),
+ DP83TD510E_LED_FN(index, mode));
+ if (ret)
+ return ret;
+
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2,
+ DP83TD510E_LED_DRV_EN(index), 0);
+}
+
+static int dp83td510_led_hw_control_get(struct phy_device *phydev,
+ u8 index, unsigned long *rules)
+{
+ int val;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_1);
+ if (val < 0)
+ return val;
+
+ val &= DP83TD510E_LED_FN_MASK(index);
+ val >>= index * 4;
+
+ switch (val) {
+ case DP83TD510E_LED_MODE_LINK_OK:
+ *rules = BIT(TRIGGER_NETDEV_LINK);
+ break;
+ /* LED mode: LED SPEED (10BaseT1L indicator) */
+ case DP83TD510E_LED_MODE_LED_SPEED:
+ *rules = BIT(TRIGGER_NETDEV_LINK_10);
+ break;
+ case DP83TD510E_LED_MODE_DUPLEX:
+ *rules = BIT(TRIGGER_NETDEV_FULL_DUPLEX);
+ break;
+ case DP83TD510E_LED_MODE_TX_ACTIVITY:
+ *rules = BIT(TRIGGER_NETDEV_TX);
+ break;
+ case DP83TD510E_LED_MODE_RX_ACTIVITY:
+ *rules = BIT(TRIGGER_NETDEV_RX);
+ break;
+ case DP83TD510E_LED_MODE_TX_RX_ACTIVITY:
+ *rules = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX);
+ break;
+ case DP83TD510E_LED_MODE_LINK_BLINK:
+ *rules = BIT(TRIGGER_NETDEV_LINK) |
+ BIT(TRIGGER_NETDEV_TX) |
+ BIT(TRIGGER_NETDEV_RX);
+ break;
+ default:
+ *rules = 0;
+ break;
+ }
+
+ return 0;
+}
+
+static int dp83td510_led_polarity_set(struct phy_device *phydev, int index,
+ unsigned long modes)
+{
+ u16 polarity = DP83TD510E_LED_POLARITY(index);
+ u32 mode;
+
+ for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) {
+ switch (mode) {
+ case PHY_LED_ACTIVE_LOW:
+ polarity = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2,
+ DP83TD510E_LED_POLARITY(index), polarity);
+}
+
/**
* dp83td510_update_stats - Update the PHY statistics for the DP83TD510 PHY.
* @phydev: Pointer to the phy_device structure.
@@ -712,6 +893,12 @@ static struct phy_driver dp83td510_driver[] = {
.get_phy_stats = dp83td510_get_phy_stats,
.update_stats = dp83td510_update_stats,
+ .led_brightness_set = dp83td510_led_brightness_set,
+ .led_hw_is_supported = dp83td510_led_hw_is_supported,
+ .led_hw_control_set = dp83td510_led_hw_control_set,
+ .led_hw_control_get = dp83td510_led_hw_control_get,
+ .led_polarity_set = dp83td510_led_polarity_set,
+
.suspend = genphy_suspend,
.resume = genphy_resume,
} };
diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c
index 050f4537d140..7e76323409c4 100644
--- a/drivers/net/phy/dp83tg720.c
+++ b/drivers/net/phy/dp83tg720.c
@@ -4,12 +4,31 @@
*/
#include <linux/bitfield.h>
#include <linux/ethtool_netlink.h>
+#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/phy.h>
+#include <linux/random.h>
#include "open_alliance_helpers.h"
+/*
+ * DP83TG720S_POLL_ACTIVE_LINK - Polling interval in milliseconds when the link
+ * is active.
+ * DP83TG720S_POLL_NO_LINK_MIN - Minimum polling interval in milliseconds when
+ * the link is down.
+ * DP83TG720S_POLL_NO_LINK_MAX - Maximum polling interval in milliseconds when
+ * the link is down.
+ *
+ * These values are not documented or officially recommended by the vendor but
+ * were determined through empirical testing. They achieve a good balance in
+ * minimizing the number of reset retries while ensuring reliable link recovery
+ * within a reasonable timeframe.
+ */
+#define DP83TG720S_POLL_ACTIVE_LINK 1000
+#define DP83TG720S_POLL_NO_LINK_MIN 100
+#define DP83TG720S_POLL_NO_LINK_MAX 1000
+
#define DP83TG720S_PHY_ID 0x2000a284
/* MDIO_MMD_VEND2 registers */
@@ -371,6 +390,13 @@ static int dp83tg720_read_status(struct phy_device *phydev)
if (ret)
return ret;
+ /* Sleep 600ms for PHY stabilization post-reset.
+ * Empirically chosen value (not documented).
+ * Helps reduce reset bounces with link partners having similar
+ * issues.
+ */
+ msleep(600);
+
/* After HW reset we need to restore master/slave configuration.
* genphy_c45_pma_baset1_read_master_slave() call will be done
* by the dp83tg720_config_aneg() function.
@@ -498,6 +524,57 @@ static int dp83tg720_probe(struct phy_device *phydev)
return 0;
}
+/**
+ * dp83tg720_get_next_update_time - Determine the next update time for PHY
+ * state
+ * @phydev: Pointer to the phy_device structure
+ *
+ * This function addresses a limitation of the DP83TG720 PHY, which cannot
+ * reliably detect or report a stable link state. To recover from such
+ * scenarios, the PHY must be periodically reset when the link is down. However,
+ * if the link partner also runs Linux with the same driver, synchronized reset
+ * intervals can lead to a deadlock where the link never establishes due to
+ * simultaneous resets on both sides.
+ *
+ * To avoid this, the function implements randomized polling intervals when the
+ * link is down. It ensures that reset intervals are desynchronized by
+ * introducing a random delay between a configured minimum and maximum range.
+ * When the link is up, a fixed polling interval is used to minimize overhead.
+ *
+ * This mechanism guarantees that the link will reestablish within 10 seconds
+ * in the worst-case scenario.
+ *
+ * Return: Time (in jiffies) until the next update event for the PHY state
+ * machine.
+ */
+static unsigned int dp83tg720_get_next_update_time(struct phy_device *phydev)
+{
+ unsigned int next_time_jiffies;
+
+ if (phydev->link) {
+ /* When the link is up, use a fixed 1000ms interval
+ * (in jiffies)
+ */
+ next_time_jiffies =
+ msecs_to_jiffies(DP83TG720S_POLL_ACTIVE_LINK);
+ } else {
+ unsigned int min_jiffies, max_jiffies, rand_jiffies;
+
+ /* When the link is down, randomize interval between min/max
+ * (in jiffies)
+ */
+ min_jiffies = msecs_to_jiffies(DP83TG720S_POLL_NO_LINK_MIN);
+ max_jiffies = msecs_to_jiffies(DP83TG720S_POLL_NO_LINK_MAX);
+
+ rand_jiffies = min_jiffies +
+ get_random_u32_below(max_jiffies - min_jiffies + 1);
+ next_time_jiffies = rand_jiffies;
+ }
+
+ /* Ensure the polling time is at least one jiffy */
+ return max(next_time_jiffies, 1U);
+}
+
static struct phy_driver dp83tg720_driver[] = {
{
PHY_ID_MATCH_MODEL(DP83TG720S_PHY_ID),
@@ -516,6 +593,7 @@ static struct phy_driver dp83tg720_driver[] = {
.get_link_stats = dp83tg720_get_link_stats,
.get_phy_stats = dp83tg720_get_phy_stats,
.update_stats = dp83tg720_update_stats,
+ .get_next_update_time = dp83tg720_get_next_update_time,
.suspend = genphy_suspend,
.resume = genphy_resume,
diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c
index a3996471a1c9..15c0f8adc2f5 100644
--- a/drivers/net/phy/marvell-88q2xxx.c
+++ b/drivers/net/phy/marvell-88q2xxx.c
@@ -7,30 +7,34 @@
* Copyright (C) 2024 Liebherr-Electronics and Drives GmbH
*/
#include <linux/ethtool_netlink.h>
+#include <linux/hwmon.h>
#include <linux/marvell_phy.h>
+#include <linux/of.h>
#include <linux/phy.h>
-#include <linux/hwmon.h>
-#define PHY_ID_88Q2220_REVB0 (MARVELL_PHY_ID_88Q2220 | 0x1)
-#define PHY_ID_88Q2220_REVB1 (MARVELL_PHY_ID_88Q2220 | 0x2)
-#define PHY_ID_88Q2220_REVB2 (MARVELL_PHY_ID_88Q2220 | 0x3)
+#define PHY_ID_88Q2220_REVB0 (MARVELL_PHY_ID_88Q2220 | 0x1)
+#define PHY_ID_88Q2220_REVB1 (MARVELL_PHY_ID_88Q2220 | 0x2)
+#define PHY_ID_88Q2220_REVB2 (MARVELL_PHY_ID_88Q2220 | 0x3)
-#define MDIO_MMD_AN_MV_STAT 32769
-#define MDIO_MMD_AN_MV_STAT_ANEG 0x0100
-#define MDIO_MMD_AN_MV_STAT_LOCAL_RX 0x1000
-#define MDIO_MMD_AN_MV_STAT_REMOTE_RX 0x2000
-#define MDIO_MMD_AN_MV_STAT_LOCAL_MASTER 0x4000
-#define MDIO_MMD_AN_MV_STAT_MS_CONF_FAULT 0x8000
+#define MDIO_MMD_AN_MV_STAT 32769
+#define MDIO_MMD_AN_MV_STAT_ANEG 0x0100
+#define MDIO_MMD_AN_MV_STAT_LOCAL_RX 0x1000
+#define MDIO_MMD_AN_MV_STAT_REMOTE_RX 0x2000
+#define MDIO_MMD_AN_MV_STAT_LOCAL_MASTER 0x4000
+#define MDIO_MMD_AN_MV_STAT_MS_CONF_FAULT 0x8000
-#define MDIO_MMD_AN_MV_STAT2 32794
-#define MDIO_MMD_AN_MV_STAT2_AN_RESOLVED 0x0800
-#define MDIO_MMD_AN_MV_STAT2_100BT1 0x2000
-#define MDIO_MMD_AN_MV_STAT2_1000BT1 0x4000
+#define MDIO_MMD_AN_MV_STAT2 32794
+#define MDIO_MMD_AN_MV_STAT2_AN_RESOLVED 0x0800
+#define MDIO_MMD_AN_MV_STAT2_100BT1 0x2000
+#define MDIO_MMD_AN_MV_STAT2_1000BT1 0x4000
-#define MDIO_MMD_PCS_MV_INT_EN 32784
-#define MDIO_MMD_PCS_MV_INT_EN_LINK_UP 0x0040
-#define MDIO_MMD_PCS_MV_INT_EN_LINK_DOWN 0x0080
-#define MDIO_MMD_PCS_MV_INT_EN_100BT1 0x1000
+#define MDIO_MMD_PCS_MV_RESET_CTRL 32768
+#define MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE 0x8
+
+#define MDIO_MMD_PCS_MV_INT_EN 32784
+#define MDIO_MMD_PCS_MV_INT_EN_LINK_UP 0x0040
+#define MDIO_MMD_PCS_MV_INT_EN_LINK_DOWN 0x0080
+#define MDIO_MMD_PCS_MV_INT_EN_100BT1 0x1000
#define MDIO_MMD_PCS_MV_GPIO_INT_STAT 32785
#define MDIO_MMD_PCS_MV_GPIO_INT_STAT_LINK_UP 0x0040
@@ -40,6 +44,22 @@
#define MDIO_MMD_PCS_MV_GPIO_INT_CTRL 32787
#define MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS 0x0800
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL 32790
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK GENMASK(7, 4)
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK GENMASK(3, 0)
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK 0x0 /* Link established */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX 0x1 /* Link established, blink for rx or tx activity */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1 0x2 /* Blink 3x for 1000BT1 link established */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX_ON 0x3 /* Receive or transmit activity */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX 0x4 /* Blink on receive or transmit activity */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX 0x5 /* Transmit activity */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_COPPER 0x6 /* Copper Link established */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON 0x7 /* 1000BT1 link established */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_OFF 0x8 /* Force off */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_ON 0x9 /* Force on */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_HIGHZ 0xa /* Force Hi-Z */
+#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_BLINK 0xb /* Force blink */
+
#define MDIO_MMD_PCS_MV_TEMP_SENSOR1 32833
#define MDIO_MMD_PCS_MV_TEMP_SENSOR1_RAW_INT 0x0001
#define MDIO_MMD_PCS_MV_TEMP_SENSOR1_INT 0x0040
@@ -60,11 +80,11 @@
#define MDIO_MMD_PCS_MV_100BT1_STAT1_REMOTE_RX 0x2000
#define MDIO_MMD_PCS_MV_100BT1_STAT1_LOCAL_MASTER 0x4000
-#define MDIO_MMD_PCS_MV_100BT1_STAT2 33033
-#define MDIO_MMD_PCS_MV_100BT1_STAT2_JABBER 0x0001
-#define MDIO_MMD_PCS_MV_100BT1_STAT2_POL 0x0002
-#define MDIO_MMD_PCS_MV_100BT1_STAT2_LINK 0x0004
-#define MDIO_MMD_PCS_MV_100BT1_STAT2_ANGE 0x0008
+#define MDIO_MMD_PCS_MV_100BT1_STAT2 33033
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_JABBER 0x0001
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_POL 0x0002
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_LINK 0x0004
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_ANGE 0x0008
#define MDIO_MMD_PCS_MV_100BT1_INT_EN 33042
#define MDIO_MMD_PCS_MV_100BT1_INT_EN_LINKEVENT 0x0400
@@ -72,7 +92,7 @@
#define MDIO_MMD_PCS_MV_COPPER_INT_STAT 33043
#define MDIO_MMD_PCS_MV_COPPER_INT_STAT_LINKEVENT 0x0400
-#define MDIO_MMD_PCS_MV_RX_STAT 33328
+#define MDIO_MMD_PCS_MV_RX_STAT 33328
#define MDIO_MMD_PCS_MV_TDR_RESET 65226
#define MDIO_MMD_PCS_MV_TDR_RESET_TDR_RST 0x1000
@@ -95,8 +115,12 @@
#define MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF 65246
+#define MV88Q2XXX_LED_INDEX_TX_ENABLE 0
+#define MV88Q2XXX_LED_INDEX_GPIO 1
+
struct mv88q2xxx_priv {
bool enable_temp;
+ bool enable_led0;
};
struct mmd_val {
@@ -460,6 +484,9 @@ static int mv88q2xxx_config_aneg(struct phy_device *phydev)
static int mv88q2xxx_config_init(struct phy_device *phydev)
{
+ struct mv88q2xxx_priv *priv = phydev->priv;
+ int ret;
+
/* The 88Q2XXX PHYs do have the extended ability register available, but
* register MDIO_PMA_EXTABLE where they should signalize it does not
* work according to specification. Therefore, we force it here.
@@ -469,10 +496,31 @@ static int mv88q2xxx_config_init(struct phy_device *phydev)
/* Configure interrupt with default settings, output is driven low for
* active interrupt and high for inactive.
*/
- if (phy_interrupt_is_valid(phydev))
- return phy_set_bits_mmd(phydev, MDIO_MMD_PCS,
- MDIO_MMD_PCS_MV_GPIO_INT_CTRL,
- MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS);
+ if (phy_interrupt_is_valid(phydev)) {
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_GPIO_INT_CTRL,
+ MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Enable LED function and disable TX disable feature on LED/TX_ENABLE */
+ if (priv->enable_led0) {
+ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_RESET_CTRL,
+ MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Enable temperature sense */
+ if (priv->enable_temp) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR2,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
@@ -740,6 +788,49 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev)
}
#endif
+#if IS_ENABLED(CONFIG_OF_MDIO)
+static int mv88q2xxx_leds_probe(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct mv88q2xxx_priv *priv = phydev->priv;
+ struct device_node *leds;
+ int ret = 0;
+ u32 index;
+
+ if (!node)
+ return 0;
+
+ leds = of_get_child_by_name(node, "leds");
+ if (!leds)
+ return 0;
+
+ for_each_available_child_of_node_scoped(leds, led) {
+ ret = of_property_read_u32(led, "reg", &index);
+ if (ret)
+ goto exit;
+
+ if (index > MV88Q2XXX_LED_INDEX_GPIO) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE)
+ priv->enable_led0 = true;
+ }
+
+exit:
+ of_node_put(leds);
+
+ return ret;
+}
+
+#else
+static int mv88q2xxx_leds_probe(struct phy_device *phydev)
+{
+ return 0;
+}
+#endif
+
static int mv88q2xxx_probe(struct phy_device *phydev)
{
struct mv88q2xxx_priv *priv;
@@ -750,6 +841,21 @@ static int mv88q2xxx_probe(struct phy_device *phydev)
phydev->priv = priv;
+ return 0;
+}
+
+static int mv88q222x_probe(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = mv88q2xxx_probe(phydev);
+ if (ret)
+ return ret;
+
+ ret = mv88q2xxx_leds_probe(phydev);
+ if (ret)
+ return ret;
+
return mv88q2xxx_hwmon_probe(phydev);
}
@@ -817,18 +923,6 @@ static int mv88q222x_revb1_revb2_config_init(struct phy_device *phydev)
static int mv88q222x_config_init(struct phy_device *phydev)
{
- struct mv88q2xxx_priv *priv = phydev->priv;
- int ret;
-
- /* Enable temperature sense */
- if (priv->enable_temp) {
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
- MDIO_MMD_PCS_MV_TEMP_SENSOR2,
- MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0);
- if (ret < 0)
- return ret;
- }
-
if (phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] == PHY_ID_88Q2220_REVB0)
return mv88q222x_revb0_config_init(phydev);
else
@@ -918,11 +1012,104 @@ static int mv88q222x_cable_test_get_status(struct phy_device *phydev,
return 0;
}
+static int mv88q2xxx_led_mode(u8 index, unsigned long rules)
+{
+ switch (rules) {
+ case BIT(TRIGGER_NETDEV_LINK):
+ return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK;
+ case BIT(TRIGGER_NETDEV_LINK_1000):
+ return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON;
+ case BIT(TRIGGER_NETDEV_TX):
+ return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX;
+ case BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX):
+ return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX;
+ case BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX):
+ return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mv88q2xxx_led_hw_is_supported(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ int mode;
+
+ mode = mv88q2xxx_led_mode(index, rules);
+ if (mode < 0)
+ return mode;
+
+ return 0;
+}
+
+static int mv88q2xxx_led_hw_control_set(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ int mode;
+
+ mode = mv88q2xxx_led_mode(index, rules);
+ if (mode < 0)
+ return mode;
+
+ if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE)
+ return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_LED_FUNC_CTRL,
+ MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK,
+ FIELD_PREP(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK,
+ mode));
+ else
+ return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_LED_FUNC_CTRL,
+ MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK,
+ FIELD_PREP(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK,
+ mode));
+}
+
+static int mv88q2xxx_led_hw_control_get(struct phy_device *phydev, u8 index,
+ unsigned long *rules)
+{
+ int val;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_LED_FUNC_CTRL);
+ if (val < 0)
+ return val;
+
+ if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE)
+ val = FIELD_GET(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK, val);
+ else
+ val = FIELD_GET(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK, val);
+
+ switch (val) {
+ case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK:
+ *rules = BIT(TRIGGER_NETDEV_LINK);
+ break;
+ case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON:
+ *rules = BIT(TRIGGER_NETDEV_LINK_1000);
+ break;
+ case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX:
+ *rules = BIT(TRIGGER_NETDEV_TX);
+ break;
+ case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX:
+ *rules = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX);
+ break;
+ case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX:
+ *rules = BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) |
+ BIT(TRIGGER_NETDEV_RX);
+ break;
+ default:
+ *rules = 0;
+ break;
+ }
+
+ return 0;
+}
+
static struct phy_driver mv88q2xxx_driver[] = {
{
.phy_id = MARVELL_PHY_ID_88Q2110,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "mv88q2110",
+ .probe = mv88q2xxx_probe,
.get_features = mv88q2xxx_get_features,
.config_aneg = mv88q2xxx_config_aneg,
.config_init = mv88q2110_config_init,
@@ -937,7 +1124,7 @@ static struct phy_driver mv88q2xxx_driver[] = {
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "mv88q2220",
.flags = PHY_POLL_CABLE_TEST,
- .probe = mv88q2xxx_probe,
+ .probe = mv88q222x_probe,
.get_features = mv88q2xxx_get_features,
.config_aneg = mv88q2xxx_config_aneg,
.aneg_done = genphy_c45_aneg_done,
@@ -953,6 +1140,9 @@ static struct phy_driver mv88q2xxx_driver[] = {
.get_sqi_max = mv88q2xxx_get_sqi_max,
.suspend = mv88q2xxx_suspend,
.resume = mv88q2xxx_resume,
+ .led_hw_is_supported = mv88q2xxx_led_hw_is_supported,
+ .led_hw_control_set = mv88q2xxx_led_hw_control_set,
+ .led_hw_control_get = mv88q2xxx_led_hw_control_get,
},
};
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 44e1927de499..dd254e36ca8a 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -3124,33 +3124,13 @@ static umode_t marvell_hwmon_is_visible(const void *data,
}
}
-static u32 marvell_hwmon_chip_config[] = {
- HWMON_C_REGISTER_TZ,
- 0
-};
-
-static const struct hwmon_channel_info marvell_hwmon_chip = {
- .type = hwmon_chip,
- .config = marvell_hwmon_chip_config,
-};
-
/* we can define HWMON_T_CRIT and HWMON_T_MAX_ALARM even though these are not
* defined for all PHYs, because the hwmon code checks whether the attributes
* exists via the .is_visible method
*/
-static u32 marvell_hwmon_temp_config[] = {
- HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM,
- 0
-};
-
-static const struct hwmon_channel_info marvell_hwmon_temp = {
- .type = hwmon_temp,
- .config = marvell_hwmon_temp_config,
-};
-
static const struct hwmon_channel_info * const marvell_hwmon_info[] = {
- &marvell_hwmon_chip,
- &marvell_hwmon_temp,
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM),
NULL
};
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 623bdb8466b8..5354c8895163 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -230,29 +230,9 @@ static const struct hwmon_ops mv3310_hwmon_ops = {
.read = mv3310_hwmon_read,
};
-static u32 mv3310_hwmon_chip_config[] = {
- HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL,
- 0,
-};
-
-static const struct hwmon_channel_info mv3310_hwmon_chip = {
- .type = hwmon_chip,
- .config = mv3310_hwmon_chip_config,
-};
-
-static u32 mv3310_hwmon_temp_config[] = {
- HWMON_T_INPUT,
- 0,
-};
-
-static const struct hwmon_channel_info mv3310_hwmon_temp = {
- .type = hwmon_temp,
- .config = mv3310_hwmon_temp_config,
-};
-
static const struct hwmon_channel_info * const mv3310_hwmon_info[] = {
- &mv3310_hwmon_chip,
- &mv3310_hwmon_temp,
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL),
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
NULL,
};
diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c
index bdf99b327029..9de6fbb45564 100644
--- a/drivers/net/phy/mediatek/mtk-ge-soc.c
+++ b/drivers/net/phy/mediatek/mtk-ge-soc.c
@@ -24,7 +24,107 @@
#define MTK_PHY_SMI_DET_ON_THRESH_MASK GENMASK(13, 8)
#define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30
-#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5
+
+/* Registers on Token Ring debug nodes */
+/* ch_addr = 0x0, node_addr = 0x7, data_addr = 0x15 */
+/* NormMseLoThresh */
+#define NORMAL_MSE_LO_THRESH_MASK GENMASK(15, 8)
+
+/* ch_addr = 0x0, node_addr = 0xf, data_addr = 0x3c */
+/* RemAckCntLimitCtrl */
+#define REMOTE_ACK_COUNT_LIMIT_CTRL_MASK GENMASK(2, 1)
+
+/* ch_addr = 0x1, node_addr = 0xd, data_addr = 0x20 */
+/* VcoSlicerThreshBitsHigh */
+#define VCO_SLICER_THRESH_HIGH_MASK GENMASK(23, 0)
+
+/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x0 */
+/* DfeTailEnableVgaThresh1000 */
+#define DFE_TAIL_EANBLE_VGA_TRHESH_1000 GENMASK(5, 1)
+
+/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x1 */
+/* MrvlTrFix100Kp */
+#define MRVL_TR_FIX_100KP_MASK GENMASK(22, 20)
+/* MrvlTrFix100Kf */
+#define MRVL_TR_FIX_100KF_MASK GENMASK(19, 17)
+/* MrvlTrFix1000Kp */
+#define MRVL_TR_FIX_1000KP_MASK GENMASK(16, 14)
+/* MrvlTrFix1000Kf */
+#define MRVL_TR_FIX_1000KF_MASK GENMASK(13, 11)
+
+/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x12 */
+/* VgaDecRate */
+#define VGA_DECIMATION_RATE_MASK GENMASK(8, 5)
+
+/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x17 */
+/* SlvDSPreadyTime */
+#define SLAVE_DSP_READY_TIME_MASK GENMASK(22, 15)
+/* MasDSPreadyTime */
+#define MASTER_DSP_READY_TIME_MASK GENMASK(14, 7)
+
+/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x18 */
+/* EnabRandUpdTrig */
+#define ENABLE_RANDOM_UPDOWN_COUNTER_TRIGGER BIT(8)
+
+/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x20 */
+/* ResetSyncOffset */
+#define RESET_SYNC_OFFSET_MASK GENMASK(11, 8)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x0 */
+/* FfeUpdGainForceVal */
+#define FFE_UPDATE_GAIN_FORCE_VAL_MASK GENMASK(9, 7)
+/* FfeUpdGainForce */
+#define FFE_UPDATE_GAIN_FORCE BIT(6)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x3 */
+/* TrFreeze */
+#define TR_FREEZE_MASK GENMASK(11, 0)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x6 */
+/* SS: Steady-state, KP: Proportional Gain */
+/* SSTrKp100 */
+#define SS_TR_KP100_MASK GENMASK(21, 19)
+/* SSTrKf100 */
+#define SS_TR_KF100_MASK GENMASK(18, 16)
+/* SSTrKp1000Mas */
+#define SS_TR_KP1000_MASTER_MASK GENMASK(15, 13)
+/* SSTrKf1000Mas */
+#define SS_TR_KF1000_MASTER_MASK GENMASK(12, 10)
+/* SSTrKp1000Slv */
+#define SS_TR_KP1000_SLAVE_MASK GENMASK(9, 7)
+/* SSTrKf1000Slv */
+#define SS_TR_KF1000_SLAVE_MASK GENMASK(6, 4)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x8 */
+/* clear this bit if wanna select from AFE */
+/* Regsigdet_sel_1000 */
+#define EEE1000_SELECT_SIGNAL_DETECTION_FROM_DFE BIT(4)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0xd */
+/* RegEEE_st2TrKf1000 */
+#define EEE1000_STAGE2_TR_KF_MASK GENMASK(13, 11)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0xf */
+/* RegEEE_slv_waketr_timer_tar */
+#define SLAVE_WAKETR_TIMER_MASK GENMASK(20, 11)
+/* RegEEE_slv_remtx_timer_tar */
+#define SLAVE_REMTX_TIMER_MASK GENMASK(10, 1)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x10 */
+/* RegEEE_slv_wake_int_timer_tar */
+#define SLAVE_WAKEINT_TIMER_MASK GENMASK(10, 1)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x14 */
+/* RegEEE_trfreeze_timer2 */
+#define TR_FREEZE_TIMER2_MASK GENMASK(9, 0)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x1c */
+/* RegEEE100Stg1_tar */
+#define EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK GENMASK(8, 0)
+
+/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x25 */
+/* REGEEE_wake_slv_tr_wait_dfesigdet_en */
+#define WAKE_SLAVE_TR_WAIT_DFE_DETECTION_EN BIT(11)
#define ANALOG_INTERNAL_OPERATION_MAX_US 20
#define TXRESERVE_MIN 0
@@ -701,40 +801,36 @@ restore:
static void mt798x_phy_common_finetune(struct phy_device *phydev)
{
phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
- /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */
- __phy_write(phydev, 0x11, 0xc71);
- __phy_write(phydev, 0x12, 0xc);
- __phy_write(phydev, 0x10, 0x8fae);
-
- /* EnabRandUpdTrig = 1 */
- __phy_write(phydev, 0x11, 0x2f00);
- __phy_write(phydev, 0x12, 0xe);
- __phy_write(phydev, 0x10, 0x8fb0);
-
- /* NormMseLoThresh = 85 */
- __phy_write(phydev, 0x11, 0x55a0);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x83aa);
-
- /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */
- __phy_write(phydev, 0x11, 0x240);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x9680);
-
- /* TrFreeze = 0 (mt7988 default) */
- __phy_write(phydev, 0x11, 0x0);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x9686);
-
- /* SSTrKp100 = 5 */
- /* SSTrKf100 = 6 */
- /* SSTrKp1000Mas = 5 */
- /* SSTrKf1000Mas = 6 */
- /* SSTrKp1000Slv = 5 */
- /* SSTrKf1000Slv = 6 */
- __phy_write(phydev, 0x11, 0xbaef);
- __phy_write(phydev, 0x12, 0x2e);
- __phy_write(phydev, 0x10, 0x968c);
+ __mtk_tr_modify(phydev, 0x1, 0xf, 0x17,
+ SLAVE_DSP_READY_TIME_MASK | MASTER_DSP_READY_TIME_MASK,
+ FIELD_PREP(SLAVE_DSP_READY_TIME_MASK, 0x18) |
+ FIELD_PREP(MASTER_DSP_READY_TIME_MASK, 0x18));
+
+ __mtk_tr_set_bits(phydev, 0x1, 0xf, 0x18,
+ ENABLE_RANDOM_UPDOWN_COUNTER_TRIGGER);
+
+ __mtk_tr_modify(phydev, 0x0, 0x7, 0x15,
+ NORMAL_MSE_LO_THRESH_MASK,
+ FIELD_PREP(NORMAL_MSE_LO_THRESH_MASK, 0x55));
+
+ __mtk_tr_modify(phydev, 0x2, 0xd, 0x0,
+ FFE_UPDATE_GAIN_FORCE_VAL_MASK,
+ FIELD_PREP(FFE_UPDATE_GAIN_FORCE_VAL_MASK, 0x4) |
+ FFE_UPDATE_GAIN_FORCE);
+
+ __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x3, TR_FREEZE_MASK);
+
+ __mtk_tr_modify(phydev, 0x2, 0xd, 0x6,
+ SS_TR_KP100_MASK | SS_TR_KF100_MASK |
+ SS_TR_KP1000_MASTER_MASK | SS_TR_KF1000_MASTER_MASK |
+ SS_TR_KP1000_SLAVE_MASK | SS_TR_KF1000_SLAVE_MASK,
+ FIELD_PREP(SS_TR_KP100_MASK, 0x5) |
+ FIELD_PREP(SS_TR_KF100_MASK, 0x6) |
+ FIELD_PREP(SS_TR_KP1000_MASTER_MASK, 0x5) |
+ FIELD_PREP(SS_TR_KF1000_MASTER_MASK, 0x6) |
+ FIELD_PREP(SS_TR_KP1000_SLAVE_MASK, 0x5) |
+ FIELD_PREP(SS_TR_KF1000_SLAVE_MASK, 0x6));
+
phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
}
@@ -757,27 +853,29 @@ static void mt7981_phy_finetune(struct phy_device *phydev)
}
phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
- /* ResetSyncOffset = 6 */
- __phy_write(phydev, 0x11, 0x600);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x8fc0);
+ __mtk_tr_modify(phydev, 0x1, 0xf, 0x20,
+ RESET_SYNC_OFFSET_MASK,
+ FIELD_PREP(RESET_SYNC_OFFSET_MASK, 0x6));
- /* VgaDecRate = 1 */
- __phy_write(phydev, 0x11, 0x4c2a);
- __phy_write(phydev, 0x12, 0x3e);
- __phy_write(phydev, 0x10, 0x8fa4);
+ __mtk_tr_modify(phydev, 0x1, 0xf, 0x12,
+ VGA_DECIMATION_RATE_MASK,
+ FIELD_PREP(VGA_DECIMATION_RATE_MASK, 0x1));
/* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2,
* MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2
*/
- __phy_write(phydev, 0x11, 0xd10a);
- __phy_write(phydev, 0x12, 0x34);
- __phy_write(phydev, 0x10, 0x8f82);
+ __mtk_tr_modify(phydev, 0x1, 0xf, 0x1,
+ MRVL_TR_FIX_100KP_MASK | MRVL_TR_FIX_100KF_MASK |
+ MRVL_TR_FIX_1000KP_MASK | MRVL_TR_FIX_1000KF_MASK,
+ FIELD_PREP(MRVL_TR_FIX_100KP_MASK, 0x3) |
+ FIELD_PREP(MRVL_TR_FIX_100KF_MASK, 0x2) |
+ FIELD_PREP(MRVL_TR_FIX_1000KP_MASK, 0x3) |
+ FIELD_PREP(MRVL_TR_FIX_1000KF_MASK, 0x2));
/* VcoSlicerThreshBitsHigh */
- __phy_write(phydev, 0x11, 0x5555);
- __phy_write(phydev, 0x12, 0x55);
- __phy_write(phydev, 0x10, 0x8ec0);
+ __mtk_tr_modify(phydev, 0x1, 0xd, 0x20,
+ VCO_SLICER_THRESH_HIGH_MASK,
+ FIELD_PREP(VCO_SLICER_THRESH_HIGH_MASK, 0x555555));
phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
/* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */
@@ -829,25 +927,23 @@ static void mt7988_phy_finetune(struct phy_device *phydev)
phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5);
phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
- /* ResetSyncOffset = 5 */
- __phy_write(phydev, 0x11, 0x500);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x8fc0);
+ __mtk_tr_modify(phydev, 0x1, 0xf, 0x20,
+ RESET_SYNC_OFFSET_MASK,
+ FIELD_PREP(RESET_SYNC_OFFSET_MASK, 0x5));
/* VgaDecRate is 1 at default on mt7988 */
- /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7,
- * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7
- */
- __phy_write(phydev, 0x11, 0xb90a);
- __phy_write(phydev, 0x12, 0x6f);
- __phy_write(phydev, 0x10, 0x8f82);
-
- /* RemAckCntLimitCtrl = 1 */
- __phy_write(phydev, 0x11, 0xfbba);
- __phy_write(phydev, 0x12, 0xc3);
- __phy_write(phydev, 0x10, 0x87f8);
-
+ __mtk_tr_modify(phydev, 0x1, 0xf, 0x1,
+ MRVL_TR_FIX_100KP_MASK | MRVL_TR_FIX_100KF_MASK |
+ MRVL_TR_FIX_1000KP_MASK | MRVL_TR_FIX_1000KF_MASK,
+ FIELD_PREP(MRVL_TR_FIX_100KP_MASK, 0x6) |
+ FIELD_PREP(MRVL_TR_FIX_100KF_MASK, 0x7) |
+ FIELD_PREP(MRVL_TR_FIX_1000KP_MASK, 0x6) |
+ FIELD_PREP(MRVL_TR_FIX_1000KF_MASK, 0x7));
+
+ __mtk_tr_modify(phydev, 0x0, 0xf, 0x3c,
+ REMOTE_ACK_COUNT_LIMIT_CTRL_MASK,
+ FIELD_PREP(REMOTE_ACK_COUNT_LIMIT_CTRL_MASK, 0x1));
phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
/* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */
@@ -923,45 +1019,37 @@ static void mt798x_phy_eee(struct phy_device *phydev)
MTK_PHY_TR_READY_SKIP_AFE_WAKEUP);
phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
- /* Regsigdet_sel_1000 = 0 */
- __phy_write(phydev, 0x11, 0xb);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x9690);
-
- /* REG_EEE_st2TrKf1000 = 2 */
- __phy_write(phydev, 0x11, 0x114f);
- __phy_write(phydev, 0x12, 0x2);
- __phy_write(phydev, 0x10, 0x969a);
-
- /* RegEEE_slv_wake_tr_timer_tar = 6, RegEEE_slv_remtx_timer_tar = 20 */
- __phy_write(phydev, 0x11, 0x3028);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x969e);
-
- /* RegEEE_slv_wake_int_timer_tar = 8 */
- __phy_write(phydev, 0x11, 0x5010);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x96a0);
-
- /* RegEEE_trfreeze_timer2 = 586 */
- __phy_write(phydev, 0x11, 0x24a);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x96a8);
-
- /* RegEEE100Stg1_tar = 16 */
- __phy_write(phydev, 0x11, 0x3210);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x96b8);
-
- /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */
- __phy_write(phydev, 0x11, 0x1463);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x96ca);
-
- /* DfeTailEnableVgaThresh1000 = 27 */
- __phy_write(phydev, 0x11, 0x36);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x8f80);
+ __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x8,
+ EEE1000_SELECT_SIGNAL_DETECTION_FROM_DFE);
+
+ __mtk_tr_modify(phydev, 0x2, 0xd, 0xd,
+ EEE1000_STAGE2_TR_KF_MASK,
+ FIELD_PREP(EEE1000_STAGE2_TR_KF_MASK, 0x2));
+
+ __mtk_tr_modify(phydev, 0x2, 0xd, 0xf,
+ SLAVE_WAKETR_TIMER_MASK | SLAVE_REMTX_TIMER_MASK,
+ FIELD_PREP(SLAVE_WAKETR_TIMER_MASK, 0x6) |
+ FIELD_PREP(SLAVE_REMTX_TIMER_MASK, 0x14));
+
+ __mtk_tr_modify(phydev, 0x2, 0xd, 0x10,
+ SLAVE_WAKEINT_TIMER_MASK,
+ FIELD_PREP(SLAVE_WAKEINT_TIMER_MASK, 0x8));
+
+ __mtk_tr_modify(phydev, 0x2, 0xd, 0x14,
+ TR_FREEZE_TIMER2_MASK,
+ FIELD_PREP(TR_FREEZE_TIMER2_MASK, 0x24a));
+
+ __mtk_tr_modify(phydev, 0x2, 0xd, 0x1c,
+ EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK,
+ FIELD_PREP(EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK,
+ 0x10));
+
+ __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x25,
+ WAKE_SLAVE_TR_WAIT_DFE_DETECTION_EN);
+
+ __mtk_tr_modify(phydev, 0x1, 0xf, 0x0,
+ DFE_TAIL_EANBLE_VGA_TRHESH_1000,
+ FIELD_PREP(DFE_TAIL_EANBLE_VGA_TRHESH_1000, 0x1b));
phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_3);
diff --git a/drivers/net/phy/mediatek/mtk-ge.c b/drivers/net/phy/mediatek/mtk-ge.c
index b517ca8573e7..73d9b72f9d9e 100644
--- a/drivers/net/phy/mediatek/mtk-ge.c
+++ b/drivers/net/phy/mediatek/mtk-ge.c
@@ -8,31 +8,58 @@
#define MTK_GPHY_ID_MT7530 0x03a29412
#define MTK_GPHY_ID_MT7531 0x03a29441
-#define MTK_EXT_PAGE_ACCESS 0x1f
-#define MTK_PHY_PAGE_STANDARD 0x0000
-#define MTK_PHY_PAGE_EXTENDED 0x0001
-#define MTK_PHY_PAGE_EXTENDED_2 0x0002
-#define MTK_PHY_PAGE_EXTENDED_3 0x0003
-#define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30
-#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5
+#define MTK_PHY_PAGE_EXTENDED_2 0x0002
+#define MTK_PHY_PAGE_EXTENDED_3 0x0003
+#define MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG11 0x11
+
+#define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30
+
+/* Registers on Token Ring debug nodes */
+/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x17 */
+#define SLAVE_DSP_READY_TIME_MASK GENMASK(22, 15)
+
+/* Registers on MDIO_MMD_VEND1 */
+#define MTK_PHY_GBE_MODE_TX_DELAY_SEL 0x13
+#define MTK_PHY_TEST_MODE_TX_DELAY_SEL 0x14
+#define MTK_TX_DELAY_PAIR_B_MASK GENMASK(10, 8)
+#define MTK_TX_DELAY_PAIR_D_MASK GENMASK(2, 0)
+
+#define MTK_PHY_MCC_CTRL_AND_TX_POWER_CTRL 0xa6
+#define MTK_MCC_NEARECHO_OFFSET_MASK GENMASK(15, 8)
+
+#define MTK_PHY_RXADC_CTRL_RG7 0xc6
+#define MTK_PHY_DA_AD_BUF_BIAS_LP_MASK GENMASK(9, 8)
+
+#define MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG123 0x123
+#define MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK GENMASK(15, 8)
+#define MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK GENMASK(7, 0)
static void mtk_gephy_config_init(struct phy_device *phydev)
{
/* Enable HW auto downshift */
- phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED, 0x14, 0, BIT(4));
+ phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED_1,
+ MTK_PHY_AUX_CTRL_AND_STATUS,
+ 0, MTK_PHY_ENABLE_DOWNSHIFT);
/* Increase SlvDPSready time */
- phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
- __phy_write(phydev, 0x10, 0xafae);
- __phy_write(phydev, 0x12, 0x2f);
- __phy_write(phydev, 0x10, 0x8fae);
- phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+ mtk_tr_modify(phydev, 0x1, 0xf, 0x17, SLAVE_DSP_READY_TIME_MASK,
+ FIELD_PREP(SLAVE_DSP_READY_TIME_MASK, 0x5e));
/* Adjust 100_mse_threshold */
- phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x123, 0xffff);
-
- /* Disable mcc */
- phy_write_mmd(phydev, MDIO_MMD_VEND1, 0xa6, 0x300);
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1,
+ MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG123,
+ MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK |
+ MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK,
+ FIELD_PREP(MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK,
+ 0xff) |
+ FIELD_PREP(MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK,
+ 0xff));
+
+ /* If echo time is narrower than 0x3, it will be regarded as noise */
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1,
+ MTK_PHY_MCC_CTRL_AND_TX_POWER_CTRL,
+ MTK_MCC_NEARECHO_OFFSET_MASK,
+ FIELD_PREP(MTK_MCC_NEARECHO_OFFSET_MASK, 0x3));
}
static int mt7530_phy_config_init(struct phy_device *phydev)
@@ -40,7 +67,8 @@ static int mt7530_phy_config_init(struct phy_device *phydev)
mtk_gephy_config_init(phydev);
/* Increase post_update_timer */
- phy_write_paged(phydev, MTK_PHY_PAGE_EXTENDED_3, 0x11, 0x4b);
+ phy_write_paged(phydev, MTK_PHY_PAGE_EXTENDED_3,
+ MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG11, 0x4b);
return 0;
}
@@ -51,11 +79,19 @@ static int mt7531_phy_config_init(struct phy_device *phydev)
/* PHY link down power saving enable */
phy_set_bits(phydev, 0x17, BIT(4));
- phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, 0xc6, 0x300);
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7,
+ MTK_PHY_DA_AD_BUF_BIAS_LP_MASK,
+ FIELD_PREP(MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3));
/* Set TX Pair delay selection */
- phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x13, 0x404);
- phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x14, 0x404);
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_GBE_MODE_TX_DELAY_SEL,
+ MTK_TX_DELAY_PAIR_B_MASK | MTK_TX_DELAY_PAIR_D_MASK,
+ FIELD_PREP(MTK_TX_DELAY_PAIR_B_MASK, 0x4) |
+ FIELD_PREP(MTK_TX_DELAY_PAIR_D_MASK, 0x4));
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_TEST_MODE_TX_DELAY_SEL,
+ MTK_TX_DELAY_PAIR_B_MASK | MTK_TX_DELAY_PAIR_D_MASK,
+ FIELD_PREP(MTK_TX_DELAY_PAIR_B_MASK, 0x4) |
+ FIELD_PREP(MTK_TX_DELAY_PAIR_D_MASK, 0x4));
return 0;
}
diff --git a/drivers/net/phy/mediatek/mtk-phy-lib.c b/drivers/net/phy/mediatek/mtk-phy-lib.c
index 98a09d670e9c..dfd0f4e439a2 100644
--- a/drivers/net/phy/mediatek/mtk-phy-lib.c
+++ b/drivers/net/phy/mediatek/mtk-phy-lib.c
@@ -6,6 +6,83 @@
#include "mtk.h"
+/* Difference between functions with mtk_tr* and __mtk_tr* prefixes is
+ * mtk_tr* functions: wrapped by page switching operations
+ * __mtk_tr* functions: no page switching operations
+ */
+
+static void __mtk_tr_access(struct phy_device *phydev, bool read, u8 ch_addr,
+ u8 node_addr, u8 data_addr)
+{
+ u16 tr_cmd = BIT(15); /* bit 14 & 0 are reserved */
+
+ if (read)
+ tr_cmd |= BIT(13);
+
+ tr_cmd |= (((ch_addr & 0x3) << 11) |
+ ((node_addr & 0xf) << 7) |
+ ((data_addr & 0x3f) << 1));
+ dev_dbg(&phydev->mdio.dev, "tr_cmd: 0x%x\n", tr_cmd);
+ __phy_write(phydev, 0x10, tr_cmd);
+}
+
+static void __mtk_tr_read(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u16 *tr_high, u16 *tr_low)
+{
+ __mtk_tr_access(phydev, true, ch_addr, node_addr, data_addr);
+ *tr_low = __phy_read(phydev, 0x11);
+ *tr_high = __phy_read(phydev, 0x12);
+ dev_dbg(&phydev->mdio.dev, "tr_high read: 0x%x, tr_low read: 0x%x\n",
+ *tr_high, *tr_low);
+}
+
+static void __mtk_tr_write(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 tr_data)
+{
+ __phy_write(phydev, 0x11, tr_data & 0xffff);
+ __phy_write(phydev, 0x12, tr_data >> 16);
+ dev_dbg(&phydev->mdio.dev, "tr_high write: 0x%x, tr_low write: 0x%x\n",
+ tr_data >> 16, tr_data & 0xffff);
+ __mtk_tr_access(phydev, false, ch_addr, node_addr, data_addr);
+}
+
+void __mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 mask, u32 set)
+{
+ u32 tr_data;
+ u16 tr_high;
+ u16 tr_low;
+
+ __mtk_tr_read(phydev, ch_addr, node_addr, data_addr, &tr_high, &tr_low);
+ tr_data = (tr_high << 16) | tr_low;
+ tr_data = (tr_data & ~mask) | set;
+ __mtk_tr_write(phydev, ch_addr, node_addr, data_addr, tr_data);
+}
+EXPORT_SYMBOL_GPL(__mtk_tr_modify);
+
+void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 mask, u32 set)
+{
+ phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
+ __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, mask, set);
+ phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+}
+EXPORT_SYMBOL_GPL(mtk_tr_modify);
+
+void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 set)
+{
+ __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, 0, set);
+}
+EXPORT_SYMBOL_GPL(__mtk_tr_set_bits);
+
+void __mtk_tr_clr_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 clr)
+{
+ __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, clr, 0);
+}
+EXPORT_SYMBOL_GPL(__mtk_tr_clr_bits);
+
int mtk_phy_read_page(struct phy_device *phydev)
{
return __phy_read(phydev, MTK_EXT_PAGE_ACCESS);
diff --git a/drivers/net/phy/mediatek/mtk.h b/drivers/net/phy/mediatek/mtk.h
index 63d9fe179b8f..320f76ffa81f 100644
--- a/drivers/net/phy/mediatek/mtk.h
+++ b/drivers/net/phy/mediatek/mtk.h
@@ -8,7 +8,13 @@
#ifndef _MTK_EPHY_H_
#define _MTK_EPHY_H_
+#define MTK_PHY_AUX_CTRL_AND_STATUS 0x14
+#define MTK_PHY_ENABLE_DOWNSHIFT BIT(4)
+
#define MTK_EXT_PAGE_ACCESS 0x1f
+#define MTK_PHY_PAGE_EXTENDED_1 0x0001
+#define MTK_PHY_PAGE_STANDARD 0x0000
+#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5
/* Registers on MDIO_MMD_VEND2 */
#define MTK_PHY_LED0_ON_CTRL 0x24
@@ -66,6 +72,15 @@ struct mtk_socphy_priv {
unsigned long led_state;
};
+void __mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 mask, u32 set);
+void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 mask, u32 set);
+void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 set);
+void __mtk_tr_clr_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr,
+ u8 data_addr, u32 clr);
+
int mtk_phy_read_page(struct phy_device *phydev);
int mtk_phy_write_page(struct phy_device *phydev, int page);
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 0dac08e85304..37c9a344bf4a 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -683,13 +683,10 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
static int genphy_c45_write_eee_adv(struct phy_device *phydev,
unsigned long *adv)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp);
int val, changed = 0;
- linkmode_andnot(tmp, adv, phydev->eee_broken_modes);
-
if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP1_FEATURES)) {
- val = linkmode_to_mii_eee_cap1_t(tmp);
+ val = linkmode_to_mii_eee_cap1_t(adv);
/* IEEE 802.3-2018 45.2.7.13 EEE advertisement 1
* (Register 7.60)
@@ -707,7 +704,7 @@ static int genphy_c45_write_eee_adv(struct phy_device *phydev,
}
if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP2_FEATURES)) {
- val = linkmode_to_mii_eee_cap2_t(tmp);
+ val = linkmode_to_mii_eee_cap2_t(adv);
/* IEEE 802.3-2022 45.2.7.16 EEE advertisement 2
* (Register 7.62)
@@ -1467,42 +1464,29 @@ EXPORT_SYMBOL_GPL(genphy_c45_plca_get_status);
/**
* genphy_c45_eee_is_active - get EEE status
* @phydev: target phy_device struct
- * @adv: variable to store advertised linkmodes
* @lp: variable to store LP advertised linkmodes
*
- * Description: this function will read local and link partner PHY
- * advertisements. Compare them return current EEE state.
+ * Description: this function will read link partner PHY advertisement
+ * and compare it to local advertisement to return current EEE state.
*/
-int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
- unsigned long *lp)
+int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *lp)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_adv) = {};
__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_lp) = {};
__ETHTOOL_DECLARE_LINK_MODE_MASK(common);
- bool eee_active;
int ret;
- ret = genphy_c45_read_eee_adv(phydev, tmp_adv);
- if (ret)
- return ret;
-
ret = genphy_c45_read_eee_lpa(phydev, tmp_lp);
if (ret)
return ret;
- linkmode_and(common, tmp_adv, tmp_lp);
- if (!linkmode_empty(tmp_adv) && !linkmode_empty(common))
- eee_active = phy_check_valid(phydev->speed, phydev->duplex,
- common);
- else
- eee_active = false;
-
- if (adv)
- linkmode_copy(adv, tmp_adv);
if (lp)
linkmode_copy(lp, tmp_lp);
- return eee_active;
+ linkmode_and(common, phydev->advertising_eee, tmp_lp);
+ if (linkmode_empty(common))
+ return 0;
+
+ return phy_check_valid(phydev->speed, phydev->duplex, common);
}
EXPORT_SYMBOL(genphy_c45_eee_is_active);
@@ -1519,14 +1503,14 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
{
int ret;
- ret = genphy_c45_eee_is_active(phydev, data->advertised,
- data->lp_advertised);
+ ret = genphy_c45_eee_is_active(phydev, data->lp_advertised);
if (ret < 0)
return ret;
data->eee_active = phydev->eee_active;
- linkmode_copy(data->supported, phydev->supported_eee);
-
+ linkmode_andnot(data->supported, phydev->supported_eee,
+ phydev->eee_disabled_modes);
+ linkmode_copy(data->advertised, phydev->advertising_eee);
return 0;
}
EXPORT_SYMBOL(genphy_c45_ethtool_get_eee);
@@ -1559,7 +1543,9 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
phydev_warn(phydev, "At least some EEE link modes are not supported.\n");
return -EINVAL;
}
- linkmode_copy(phydev->advertising_eee, adv);
+
+ linkmode_andnot(phydev->advertising_eee, adv,
+ phydev->eee_disabled_modes);
} else if (linkmode_empty(phydev->advertising_eee)) {
phy_advertise_eee_all(phydev);
}
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 6bf3ec985f3d..2fd1d153abc9 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -13,7 +13,7 @@
*/
const char *phy_speed_to_str(int speed)
{
- BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103,
+ BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 121,
"Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
"If a speed or mode has been added please update phy_speed_to_str "
"and the PHY settings array.\n");
@@ -169,6 +169,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 800000, FULL, 800000baseDR8_2_Full ),
PHY_SETTING( 800000, FULL, 800000baseSR8_Full ),
PHY_SETTING( 800000, FULL, 800000baseVR8_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseCR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseKR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseDR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseDR4_2_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseSR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseVR4_Full ),
/* 400G */
PHY_SETTING( 400000, FULL, 400000baseCR8_Full ),
PHY_SETTING( 400000, FULL, 400000baseKR8_Full ),
@@ -180,6 +186,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 400000, FULL, 400000baseLR4_ER4_FR4_Full ),
PHY_SETTING( 400000, FULL, 400000baseDR4_Full ),
PHY_SETTING( 400000, FULL, 400000baseSR4_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseCR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseKR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseDR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseDR2_2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseSR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseVR2_Full ),
/* 200G */
PHY_SETTING( 200000, FULL, 200000baseCR4_Full ),
PHY_SETTING( 200000, FULL, 200000baseKR4_Full ),
@@ -191,6 +203,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 200000, FULL, 200000baseLR2_ER2_FR2_Full ),
PHY_SETTING( 200000, FULL, 200000baseDR2_Full ),
PHY_SETTING( 200000, FULL, 200000baseSR2_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseCR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseKR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseDR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseDR_2_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseSR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseVR_Full ),
/* 100G */
PHY_SETTING( 100000, FULL, 100000baseCR4_Full ),
PHY_SETTING( 100000, FULL, 100000baseKR4_Full ),
@@ -388,7 +406,7 @@ void of_set_phy_supported(struct phy_device *phydev)
void of_set_phy_eee_broken(struct phy_device *phydev)
{
struct device_node *node = phydev->mdio.dev.of_node;
- unsigned long *modes = phydev->eee_broken_modes;
+ unsigned long *modes = phydev->eee_disabled_modes;
if (!IS_ENABLED(CONFIG_OF_MDIO) || !node)
return;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index d0c1718e2b16..831b36839627 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -302,7 +302,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev,
cmd->base.port = PORT_BNC;
else
cmd->base.port = phydev->port;
- cmd->base.transceiver = phy_is_internal(phydev) ?
+ cmd->base.transceiver = phydev->is_internal ?
XCVR_INTERNAL : XCVR_EXTERNAL;
cmd->base.phy_address = phydev->mdio.addr;
cmd->base.autoneg = phydev->autoneg;
@@ -520,12 +520,12 @@ int __phy_hwtstamp_set(struct phy_device *phydev,
* @phydev: the phy_device struct
* @jiffies: Run the state machine after these jiffies
*/
-void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies)
+static void phy_queue_state_machine(struct phy_device *phydev,
+ unsigned long jiffies)
{
mod_delayed_work(system_power_efficient_wq, &phydev->state_queue,
jiffies);
}
-EXPORT_SYMBOL(phy_queue_state_machine);
/**
* phy_trigger_machine - Trigger the state machine to run now
@@ -1031,7 +1031,7 @@ static int phy_check_link_status(struct phy_device *phydev)
if (phydev->link && phydev->state != PHY_RUNNING) {
phy_check_downshift(phydev);
phydev->state = PHY_RUNNING;
- err = genphy_c45_eee_is_active(phydev, NULL, NULL);
+ err = genphy_c45_eee_is_active(phydev, NULL);
phydev->eee_active = err > 0;
phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled &&
phydev->eee_active;
@@ -1501,6 +1501,24 @@ void phy_free_interrupt(struct phy_device *phydev)
}
EXPORT_SYMBOL(phy_free_interrupt);
+/**
+ * phy_get_next_update_time - Determine the next PHY update time
+ * @phydev: Pointer to the phy_device structure
+ *
+ * This function queries the PHY driver to get the time for the next polling
+ * event. If the driver does not implement the callback, a default value is
+ * used.
+ *
+ * Return: The time for the next polling event in jiffies
+ */
+static unsigned int phy_get_next_update_time(struct phy_device *phydev)
+{
+ if (phydev->drv && phydev->drv->get_next_update_time)
+ return phydev->drv->get_next_update_time(phydev);
+
+ return PHY_STATE_TIME;
+}
+
enum phy_state_work {
PHY_STATE_WORK_NONE,
PHY_STATE_WORK_ANEG,
@@ -1580,7 +1598,8 @@ static enum phy_state_work _phy_state_machine(struct phy_device *phydev)
* called from phy_disconnect() synchronously.
*/
if (phy_polling_mode(phydev) && phy_is_started(phydev))
- phy_queue_state_machine(phydev, PHY_STATE_TIME);
+ phy_queue_state_machine(phydev,
+ phy_get_next_update_time(phydev));
return state_work;
}
@@ -1761,7 +1780,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
if (!phydev->drv)
return -EIO;
- ret = genphy_c45_eee_is_active(phydev, NULL, NULL);
+ ret = genphy_c45_eee_is_active(phydev, NULL);
if (ret < 0)
return ret;
if (!ret)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 46713d27412b..7c4e1ad1864c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -45,6 +45,17 @@ MODULE_DESCRIPTION("PHY library");
MODULE_AUTHOR("Andy Fleming");
MODULE_LICENSE("GPL");
+#define PHY_ANY_ID "MATCH ANY PHY"
+#define PHY_ANY_UID 0xffffffff
+
+struct phy_fixup {
+ struct list_head list;
+ char bus_id[MII_BUS_ID_SIZE + 3];
+ u32 phy_uid;
+ u32 phy_uid_mask;
+ int (*run)(struct phy_device *phydev);
+};
+
__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init;
EXPORT_SYMBOL_GPL(phy_basic_features);
@@ -80,37 +91,28 @@ static const int phy_all_ports_features_array[7] = {
ETHTOOL_LINK_MODE_Backplane_BIT,
};
-const int phy_10_100_features_array[4] = {
+static const int phy_10_100_features_array[4] = {
ETHTOOL_LINK_MODE_10baseT_Half_BIT,
ETHTOOL_LINK_MODE_10baseT_Full_BIT,
ETHTOOL_LINK_MODE_100baseT_Half_BIT,
ETHTOOL_LINK_MODE_100baseT_Full_BIT,
};
-EXPORT_SYMBOL_GPL(phy_10_100_features_array);
-const int phy_basic_t1_features_array[3] = {
+static const int phy_basic_t1_features_array[3] = {
ETHTOOL_LINK_MODE_TP_BIT,
ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
ETHTOOL_LINK_MODE_100baseT1_Full_BIT,
};
-EXPORT_SYMBOL_GPL(phy_basic_t1_features_array);
-const int phy_basic_t1s_p2mp_features_array[2] = {
+static const int phy_basic_t1s_p2mp_features_array[2] = {
ETHTOOL_LINK_MODE_TP_BIT,
ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT,
};
-EXPORT_SYMBOL_GPL(phy_basic_t1s_p2mp_features_array);
-const int phy_gbit_features_array[2] = {
+static const int phy_gbit_features_array[2] = {
ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
};
-EXPORT_SYMBOL_GPL(phy_gbit_features_array);
-
-const int phy_10gbit_features_array[1] = {
- ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
-};
-EXPORT_SYMBOL_GPL(phy_10gbit_features_array);
static const int phy_eee_cap1_features_array[] = {
ETHTOOL_LINK_MODE_100baseT_Full_BIT,
@@ -185,9 +187,8 @@ static void features_init(void)
linkmode_set_bit_array(phy_gbit_features_array,
ARRAY_SIZE(phy_gbit_features_array),
phy_10gbit_features);
- linkmode_set_bit_array(phy_10gbit_features_array,
- ARRAY_SIZE(phy_10gbit_features_array),
- phy_10gbit_features);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ phy_10gbit_features);
linkmode_set_bit_array(phy_eee_cap1_features_array,
ARRAY_SIZE(phy_eee_cap1_features_array),
@@ -378,8 +379,8 @@ static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend,
* comparison
* @run: The actual code to be run when a matching PHY is found
*/
-int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
- int (*run)(struct phy_device *))
+static int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
+ int (*run)(struct phy_device *))
{
struct phy_fixup *fixup = kzalloc(sizeof(*fixup), GFP_KERNEL);
@@ -397,7 +398,6 @@ int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
return 0;
}
-EXPORT_SYMBOL(phy_register_fixup);
/* Registers a fixup to be run on any PHY with the UID in phy_uid */
int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
@@ -544,7 +544,7 @@ phy_interface_show(struct device *dev, struct device_attribute *attr, char *buf)
struct phy_device *phydev = to_phy_device(dev);
const char *mode = NULL;
- if (phy_is_internal(phydev))
+ if (phydev->is_internal)
mode = "internal";
else
mode = phy_modes(phydev->interface);
@@ -2966,7 +2966,7 @@ void phy_disable_eee(struct phy_device *phydev)
phydev->eee_cfg.tx_lpi_enabled = false;
phydev->eee_cfg.eee_enabled = false;
/* don't let userspace re-enable EEE advertisement */
- linkmode_fill(phydev->eee_broken_modes);
+ linkmode_fill(phydev->eee_disabled_modes);
}
EXPORT_SYMBOL_GPL(phy_disable_eee);
@@ -3096,19 +3096,12 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause)
EXPORT_SYMBOL(phy_get_pause);
#if IS_ENABLED(CONFIG_OF_MDIO)
-static int phy_get_int_delay_property(struct device *dev, const char *name)
+static int phy_get_u32_property(struct device *dev, const char *name, u32 *val)
{
- s32 int_delay;
- int ret;
-
- ret = device_property_read_u32(dev, name, &int_delay);
- if (ret)
- return ret;
-
- return int_delay;
+ return device_property_read_u32(dev, name, val);
}
#else
-static int phy_get_int_delay_property(struct device *dev, const char *name)
+static int phy_get_u32_property(struct device *dev, const char *name, u32 *val)
{
return -EINVAL;
}
@@ -3133,12 +3126,12 @@ static int phy_get_int_delay_property(struct device *dev, const char *name)
s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
const int *delay_values, int size, bool is_rx)
{
- s32 delay;
- int i;
+ int i, ret;
+ u32 delay;
if (is_rx) {
- delay = phy_get_int_delay_property(dev, "rx-internal-delay-ps");
- if (delay < 0 && size == 0) {
+ ret = phy_get_u32_property(dev, "rx-internal-delay-ps", &delay);
+ if (ret < 0 && size == 0) {
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
return 1;
@@ -3147,8 +3140,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
}
} else {
- delay = phy_get_int_delay_property(dev, "tx-internal-delay-ps");
- if (delay < 0 && size == 0) {
+ ret = phy_get_u32_property(dev, "tx-internal-delay-ps", &delay);
+ if (ret < 0 && size == 0) {
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
return 1;
@@ -3157,8 +3150,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
}
}
- if (delay < 0)
- return delay;
+ if (ret < 0)
+ return ret;
if (size == 0)
return delay;
@@ -3193,6 +3186,30 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
}
EXPORT_SYMBOL(phy_get_internal_delay);
+/**
+ * phy_get_tx_amplitude_gain - stores tx amplitude gain in @val
+ * @phydev: phy_device struct
+ * @dev: pointer to the devices device struct
+ * @linkmode: linkmode for which the tx amplitude gain should be retrieved
+ * @val: tx amplitude gain
+ *
+ * Returns: 0 on success, < 0 on failure
+ */
+int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev,
+ enum ethtool_link_mode_bit_indices linkmode,
+ u32 *val)
+{
+ switch (linkmode) {
+ case ETHTOOL_LINK_MODE_100baseT_Full_BIT:
+ return phy_get_u32_property(dev,
+ "tx-amplitude-100base-tx-percent",
+ val);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL_GPL(phy_get_tx_amplitude_gain);
+
static int phy_led_set_brightness(struct led_classdev *led_cdev,
enum led_brightness value)
{
@@ -3563,22 +3580,21 @@ static int phy_probe(struct device *dev)
if (err)
goto out;
- /* There is no "enabled" flag. If PHY is advertising, assume it is
- * kind of enabled.
- */
- phydev->eee_cfg.eee_enabled = !linkmode_empty(phydev->advertising_eee);
+ /* Get the EEE modes we want to prohibit. */
+ of_set_phy_eee_broken(phydev);
/* Some PHYs may advertise, by default, not support EEE modes. So,
- * we need to clean them.
+ * we need to clean them. In addition remove all disabled EEE modes.
*/
- if (phydev->eee_cfg.eee_enabled)
- linkmode_and(phydev->advertising_eee, phydev->supported_eee,
- phydev->advertising_eee);
+ linkmode_and(phydev->advertising_eee, phydev->supported_eee,
+ phydev->advertising_eee);
+ linkmode_andnot(phydev->advertising_eee, phydev->advertising_eee,
+ phydev->eee_disabled_modes);
- /* Get the EEE modes we want to prohibit. We will ask
- * the PHY stop advertising these mode later on
+ /* There is no "enabled" flag. If PHY is advertising, assume it is
+ * kind of enabled.
*/
- of_set_phy_eee_broken(phydev);
+ phydev->eee_cfg.eee_enabled = !linkmode_empty(phydev->advertising_eee);
/* Get master/slave strap overrides */
of_set_phy_timing_role(phydev);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index b00a315de060..a3b186ab3854 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1073,6 +1073,18 @@ static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
pcs->ops->pcs_link_up(pcs, neg_mode, interface, speed, duplex);
}
+static void phylink_pcs_disable_eee(struct phylink_pcs *pcs)
+{
+ if (pcs && pcs->ops->pcs_disable_eee)
+ pcs->ops->pcs_disable_eee(pcs);
+}
+
+static void phylink_pcs_enable_eee(struct phylink_pcs *pcs)
+{
+ if (pcs && pcs->ops->pcs_enable_eee)
+ pcs->ops->pcs_enable_eee(pcs);
+}
+
/* Query inband for a specific interface mode, asking the MAC for the
* PCS which will be used to handle the interface mode.
*/
@@ -1353,7 +1365,6 @@ static void phylink_major_config(struct phylink *pl, bool restart,
struct phylink_pcs *pcs = NULL;
bool pcs_changed = false;
unsigned int rate_kbd;
- unsigned int neg_mode;
int err;
phylink_dbg(pl, "major config, requested %s/%s\n",
@@ -1416,11 +1427,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
if (pl->pcs_state == PCS_STATE_STARTING || pcs_changed)
phylink_pcs_enable(pl->pcs);
- neg_mode = pl->act_link_an_mode;
- if (pl->pcs && pl->pcs->neg_mode)
- neg_mode = pl->pcs_neg_mode;
-
- err = phylink_pcs_config(pl->pcs, neg_mode, state,
+ err = phylink_pcs_config(pl->pcs, pl->pcs_neg_mode, state,
!!(pl->link_config.pause & MLO_PAUSE_AN));
if (err < 0)
phylink_err(pl, "pcs_config failed: %pe\n",
@@ -1463,7 +1470,6 @@ static void phylink_major_config(struct phylink *pl, bool restart,
*/
static int phylink_change_inband_advert(struct phylink *pl)
{
- unsigned int neg_mode;
int ret;
if (test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state))
@@ -1479,15 +1485,11 @@ static int phylink_change_inband_advert(struct phylink *pl)
phylink_pcs_neg_mode(pl, pl->pcs, pl->link_config.interface,
pl->link_config.advertising);
- neg_mode = pl->act_link_an_mode;
- if (pl->pcs->neg_mode)
- neg_mode = pl->pcs_neg_mode;
-
/* Modern PCS-based method; update the advert at the PCS, and
* restart negotiation if the pcs_config() helper indicates that
* the programmed advertisement has changed.
*/
- ret = phylink_pcs_config(pl->pcs, neg_mode, &pl->link_config,
+ ret = phylink_pcs_config(pl->pcs, pl->pcs_neg_mode, &pl->link_config,
!!(pl->link_config.pause & MLO_PAUSE_AN));
if (ret < 0)
return ret;
@@ -1511,13 +1513,7 @@ static void phylink_mac_pcs_get_state(struct phylink *pl,
state->an_complete = 0;
state->link = 1;
- pcs = pl->pcs;
- if (!pcs || pcs->neg_mode)
- autoneg = pl->pcs_neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED;
- else
- autoneg = linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
- state->advertising);
-
+ autoneg = pl->pcs_neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED;
if (autoneg) {
state->speed = SPEED_UNKNOWN;
state->duplex = DUPLEX_UNKNOWN;
@@ -1528,6 +1524,7 @@ static void phylink_mac_pcs_get_state(struct phylink *pl,
state->pause = pl->link_config.pause;
}
+ pcs = pl->pcs;
if (pcs)
pcs->ops->pcs_get_state(pcs, pl->pcs_neg_mode, state);
else
@@ -1601,6 +1598,8 @@ static void phylink_deactivate_lpi(struct phylink *pl)
phylink_dbg(pl, "disabling LPI\n");
pl->mac_ops->mac_disable_tx_lpi(pl->config);
+
+ phylink_pcs_disable_eee(pl->pcs);
}
}
@@ -1617,20 +1616,24 @@ static void phylink_activate_lpi(struct phylink *pl)
phylink_dbg(pl, "LPI timer %uus, tx clock stop %u\n",
pl->mac_tx_lpi_timer, pl->mac_tx_clk_stop);
+ phylink_pcs_enable_eee(pl->pcs);
+
err = pl->mac_ops->mac_enable_tx_lpi(pl->config, pl->mac_tx_lpi_timer,
pl->mac_tx_clk_stop);
- if (!err)
- pl->mac_enable_tx_lpi = true;
- else
+ if (err) {
+ phylink_pcs_disable_eee(pl->pcs);
phylink_err(pl, "%ps() failed: %pe\n",
pl->mac_ops->mac_enable_tx_lpi, ERR_PTR(err));
+ return;
+ }
+
+ pl->mac_enable_tx_lpi = true;
}
static void phylink_link_up(struct phylink *pl,
struct phylink_link_state link_state)
{
struct net_device *ndev = pl->netdev;
- unsigned int neg_mode;
int speed, duplex;
bool rx_pause;
@@ -1661,11 +1664,7 @@ static void phylink_link_up(struct phylink *pl,
pl->cur_interface = link_state.interface;
- neg_mode = pl->act_link_an_mode;
- if (pl->pcs && pl->pcs->neg_mode)
- neg_mode = pl->pcs_neg_mode;
-
- phylink_pcs_link_up(pl->pcs, neg_mode, pl->cur_interface, speed,
+ phylink_pcs_link_up(pl->pcs, pl->pcs_neg_mode, pl->cur_interface, speed,
duplex);
pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->act_link_an_mode,
@@ -1957,8 +1956,7 @@ struct phylink *phylink_create(struct phylink_config *config,
return ERR_PTR(-EINVAL);
}
- pl->mac_supports_eee_ops = mac_ops->mac_disable_tx_lpi &&
- mac_ops->mac_enable_tx_lpi;
+ pl->mac_supports_eee_ops = phylink_mac_implements_lpi(mac_ops);
pl->mac_supports_eee = pl->mac_supports_eee_ops &&
pl->config->lpi_capabilities &&
!phy_interface_empty(pl->config->lpi_interfaces);
diff --git a/drivers/net/phy/realtek/Kconfig b/drivers/net/phy/realtek/Kconfig
index 31935f147d87..b05c2a1e9024 100644
--- a/drivers/net/phy/realtek/Kconfig
+++ b/drivers/net/phy/realtek/Kconfig
@@ -4,8 +4,12 @@ config REALTEK_PHY
help
Currently supports RTL821x/RTL822x and fast ethernet PHYs
+if REALTEK_PHY
+
config REALTEK_PHY_HWMON
- def_bool REALTEK_PHY && HWMON
- depends on !(REALTEK_PHY=y && HWMON=m)
+ bool "HWMON support for Realtek PHYs"
+ depends on HWMON && !(REALTEK_PHY=y && HWMON=m)
help
Optional hwmon support for the temperature sensor
+
+endif # REALTEK_PHY
diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c
index 572a933636b0..7a0b19d66aca 100644
--- a/drivers/net/phy/realtek/realtek_main.c
+++ b/drivers/net/phy/realtek/realtek_main.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/clk.h>
+#include <linux/string_choices.h>
#include "realtek.h"
@@ -78,9 +79,7 @@
/* RTL822X_VND2_XXXXX registers are only accessible when phydev->is_c45
* is set, they cannot be accessed by C45-over-C22.
*/
-#define RTL822X_VND2_GBCR 0xa412
-
-#define RTL822X_VND2_GANLPAR 0xa414
+#define RTL822X_VND2_C22_REG(reg) (0xa400 + 2 * (reg))
#define RTL8366RB_POWER_SAVE 0x15
#define RTL8366RB_POWER_SAVE_ON BIT(12)
@@ -95,6 +94,16 @@
#define RTL_VND2_PHYSR_MASTER BIT(11)
#define RTL_VND2_PHYSR_SPEED_MASK (RTL_VND2_PHYSR_SPEEDL | RTL_VND2_PHYSR_SPEEDH)
+#define RTL_MDIO_PCS_EEE_ABLE 0xa5c4
+#define RTL_MDIO_AN_EEE_ADV 0xa5d0
+#define RTL_MDIO_AN_EEE_LPABLE 0xa5d2
+#define RTL_MDIO_AN_10GBT_CTRL 0xa5d4
+#define RTL_MDIO_AN_10GBT_STAT 0xa5d6
+#define RTL_MDIO_PMA_SPEED 0xa616
+#define RTL_MDIO_AN_EEE_LPABLE2 0xa6d0
+#define RTL_MDIO_AN_EEE_ADV2 0xa6d4
+#define RTL_MDIO_PCS_EEE_ABLE2 0xa6ec
+
#define RTL_GENERIC_PHYID 0x001cc800
#define RTL_8211FVD_PHYID 0x001cc878
#define RTL_8221B 0x001cc840
@@ -422,11 +431,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
} else if (ret) {
dev_dbg(dev,
"%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n",
- val_txdly ? "Enabling" : "Disabling");
+ str_enable_disable(val_txdly));
} else {
dev_dbg(dev,
"2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n",
- val_txdly ? "enabled" : "disabled");
+ str_enabled_disabled(val_txdly));
}
ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY,
@@ -437,11 +446,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
} else if (ret) {
dev_dbg(dev,
"%s 2ns RX delay (and changing the value from pin-strapping RXD0 or the bootloader)\n",
- val_rxdly ? "Enabling" : "Disabling");
+ str_enable_disable(val_rxdly));
} else {
dev_dbg(dev,
"2ns RX delay was already %s (by pin-strapping RXD0 or bootloader configuration)\n",
- val_rxdly ? "enabled" : "disabled");
+ str_enabled_disabled(val_rxdly));
}
if (priv->has_phycr2) {
@@ -734,29 +743,31 @@ static int rtlgen_read_status(struct phy_device *phydev)
return 0;
}
+static int rtlgen_read_vend2(struct phy_device *phydev, int regnum)
+{
+ return __mdiobus_c45_read(phydev->mdio.bus, 0, MDIO_MMD_VEND2, regnum);
+}
+
+static int rtlgen_write_vend2(struct phy_device *phydev, int regnum, u16 val)
+{
+ return __mdiobus_c45_write(phydev->mdio.bus, 0, MDIO_MMD_VEND2, regnum,
+ val);
+}
+
static int rtlgen_read_mmd(struct phy_device *phydev, int devnum, u16 regnum)
{
int ret;
- if (devnum == MDIO_MMD_VEND2) {
- rtl821x_write_page(phydev, regnum >> 4);
- ret = __phy_read(phydev, 0x10 + ((regnum & 0xf) >> 1));
- rtl821x_write_page(phydev, 0);
- } else if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE) {
- rtl821x_write_page(phydev, 0xa5c);
- ret = __phy_read(phydev, 0x12);
- rtl821x_write_page(phydev, 0);
- } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV) {
- rtl821x_write_page(phydev, 0xa5d);
- ret = __phy_read(phydev, 0x10);
- rtl821x_write_page(phydev, 0);
- } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE) {
- rtl821x_write_page(phydev, 0xa5d);
- ret = __phy_read(phydev, 0x11);
- rtl821x_write_page(phydev, 0);
- } else {
+ if (devnum == MDIO_MMD_VEND2)
+ ret = rtlgen_read_vend2(phydev, regnum);
+ else if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE)
+ ret = rtlgen_read_vend2(phydev, RTL_MDIO_PCS_EEE_ABLE);
+ else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV)
+ ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_ADV);
+ else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE)
+ ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_LPABLE);
+ else
ret = -EOPNOTSUPP;
- }
return ret;
}
@@ -766,17 +777,12 @@ static int rtlgen_write_mmd(struct phy_device *phydev, int devnum, u16 regnum,
{
int ret;
- if (devnum == MDIO_MMD_VEND2) {
- rtl821x_write_page(phydev, regnum >> 4);
- ret = __phy_write(phydev, 0x10 + ((regnum & 0xf) >> 1), val);
- rtl821x_write_page(phydev, 0);
- } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV) {
- rtl821x_write_page(phydev, 0xa5d);
- ret = __phy_write(phydev, 0x10, val);
- rtl821x_write_page(phydev, 0);
- } else {
+ if (devnum == MDIO_MMD_VEND2)
+ ret = rtlgen_write_vend2(phydev, regnum, val);
+ else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV)
+ ret = rtlgen_write_vend2(phydev, regnum, RTL_MDIO_AN_EEE_ADV);
+ else
ret = -EOPNOTSUPP;
- }
return ret;
}
@@ -788,19 +794,12 @@ static int rtl822x_read_mmd(struct phy_device *phydev, int devnum, u16 regnum)
if (ret != -EOPNOTSUPP)
return ret;
- if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE2) {
- rtl821x_write_page(phydev, 0xa6e);
- ret = __phy_read(phydev, 0x16);
- rtl821x_write_page(phydev, 0);
- } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2) {
- rtl821x_write_page(phydev, 0xa6d);
- ret = __phy_read(phydev, 0x12);
- rtl821x_write_page(phydev, 0);
- } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE2) {
- rtl821x_write_page(phydev, 0xa6d);
- ret = __phy_read(phydev, 0x10);
- rtl821x_write_page(phydev, 0);
- }
+ if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE2)
+ ret = rtlgen_read_vend2(phydev, RTL_MDIO_PCS_EEE_ABLE2);
+ else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2)
+ ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_ADV2);
+ else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE2)
+ ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_LPABLE2);
return ret;
}
@@ -813,11 +812,8 @@ static int rtl822x_write_mmd(struct phy_device *phydev, int devnum, u16 regnum,
if (ret != -EOPNOTSUPP)
return ret;
- if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2) {
- rtl821x_write_page(phydev, 0xa6d);
- ret = __phy_write(phydev, 0x12, val);
- rtl821x_write_page(phydev, 0);
- }
+ if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2)
+ ret = rtlgen_write_vend2(phydev, RTL_MDIO_AN_EEE_ADV2, val);
return ret;
}
@@ -913,7 +909,7 @@ static int rtl822x_get_features(struct phy_device *phydev)
{
int val;
- val = phy_read_paged(phydev, 0xa61, 0x13);
+ val = phy_read_mmd(phydev, MDIO_MMD_VEND2, RTL_MDIO_PMA_SPEED);
if (val < 0)
return val;
@@ -934,10 +930,10 @@ static int rtl822x_config_aneg(struct phy_device *phydev)
if (phydev->autoneg == AUTONEG_ENABLE) {
u16 adv = linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising);
- ret = phy_modify_paged_changed(phydev, 0xa5d, 0x12,
- MDIO_AN_10GBT_CTRL_ADV2_5G |
- MDIO_AN_10GBT_CTRL_ADV5G,
- adv);
+ ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND2,
+ RTL_MDIO_AN_10GBT_CTRL,
+ MDIO_AN_10GBT_CTRL_ADV2_5G |
+ MDIO_AN_10GBT_CTRL_ADV5G, adv);
if (ret < 0)
return ret;
}
@@ -981,7 +977,7 @@ static int rtl822x_read_status(struct phy_device *phydev)
!phydev->autoneg_complete)
return 0;
- lpadv = phy_read_paged(phydev, 0xa5d, 0x13);
+ lpadv = phy_read_mmd(phydev, MDIO_MMD_VEND2, RTL_MDIO_AN_10GBT_STAT);
if (lpadv < 0)
return lpadv;
@@ -1028,7 +1024,8 @@ static int rtl822x_c45_config_aneg(struct phy_device *phydev)
val = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
/* Vendor register as C45 has no standardized support for 1000BaseT */
- ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND2, RTL822X_VND2_GBCR,
+ ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND2,
+ RTL822X_VND2_C22_REG(MII_CTRL1000),
ADVERTISE_1000FULL, val);
if (ret < 0)
return ret;
@@ -1045,7 +1042,7 @@ static int rtl822x_c45_read_status(struct phy_device *phydev)
/* Vendor register as C45 has no standardized support for 1000BaseT */
if (phydev->autoneg == AUTONEG_ENABLE && genphy_c45_aneg_done(phydev)) {
val = phy_read_mmd(phydev, MDIO_MMD_VEND2,
- RTL822X_VND2_GANLPAR);
+ RTL822X_VND2_C22_REG(MII_STAT1000));
if (val < 0)
return val;
} else {
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 5ca6ecf0ce5f..d4ece538f1b2 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -26,74 +26,9 @@
#include <linux/virtio_net.h>
#include <linux/skb_array.h>
-#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
-
-#define TAP_VNET_LE 0x80000000
-#define TAP_VNET_BE 0x40000000
-
-#ifdef CONFIG_TUN_VNET_CROSS_LE
-static inline bool tap_legacy_is_little_endian(struct tap_queue *q)
-{
- return q->flags & TAP_VNET_BE ? false :
- virtio_legacy_is_little_endian();
-}
-
-static long tap_get_vnet_be(struct tap_queue *q, int __user *sp)
-{
- int s = !!(q->flags & TAP_VNET_BE);
-
- if (put_user(s, sp))
- return -EFAULT;
-
- return 0;
-}
-
-static long tap_set_vnet_be(struct tap_queue *q, int __user *sp)
-{
- int s;
-
- if (get_user(s, sp))
- return -EFAULT;
-
- if (s)
- q->flags |= TAP_VNET_BE;
- else
- q->flags &= ~TAP_VNET_BE;
-
- return 0;
-}
-#else
-static inline bool tap_legacy_is_little_endian(struct tap_queue *q)
-{
- return virtio_legacy_is_little_endian();
-}
-
-static long tap_get_vnet_be(struct tap_queue *q, int __user *argp)
-{
- return -EINVAL;
-}
-
-static long tap_set_vnet_be(struct tap_queue *q, int __user *argp)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_TUN_VNET_CROSS_LE */
-
-static inline bool tap_is_little_endian(struct tap_queue *q)
-{
- return q->flags & TAP_VNET_LE ||
- tap_legacy_is_little_endian(q);
-}
-
-static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val)
-{
- return __virtio16_to_cpu(tap_is_little_endian(q), val);
-}
+#include "tun_vnet.h"
-static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val)
-{
- return __cpu_to_virtio16(tap_is_little_endian(q), val);
-}
+#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
static struct proto tap_proto = {
.name = "tap",
@@ -645,6 +580,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
int err;
struct virtio_net_hdr vnet_hdr = { 0 };
int vnet_hdr_len = 0;
+ int hdr_len = 0;
int copylen = 0;
int depth;
bool zerocopy = false;
@@ -654,25 +590,13 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
if (q->flags & IFF_VNET_HDR) {
vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
- err = -EINVAL;
- if (len < vnet_hdr_len)
+ hdr_len = tun_vnet_hdr_get(vnet_hdr_len, q->flags, from, &vnet_hdr);
+ if (hdr_len < 0) {
+ err = hdr_len;
goto err;
- len -= vnet_hdr_len;
+ }
- err = -EFAULT;
- if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from))
- goto err;
- iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr));
- if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
- tap16_to_cpu(q, vnet_hdr.csum_start) +
- tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 >
- tap16_to_cpu(q, vnet_hdr.hdr_len))
- vnet_hdr.hdr_len = cpu_to_tap16(q,
- tap16_to_cpu(q, vnet_hdr.csum_start) +
- tap16_to_cpu(q, vnet_hdr.csum_offset) + 2);
- err = -EINVAL;
- if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len)
- goto err;
+ len -= vnet_hdr_len;
}
err = -EINVAL;
@@ -682,12 +606,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
struct iov_iter i;
- copylen = vnet_hdr.hdr_len ?
- tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
- if (copylen > good_linear)
- copylen = good_linear;
- else if (copylen < ETH_HLEN)
- copylen = ETH_HLEN;
+ copylen = clamp(hdr_len ?: GOODCOPY_LEN, ETH_HLEN, good_linear);
linear = copylen;
i = *from;
iov_iter_advance(&i, copylen);
@@ -697,11 +616,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
if (!zerocopy) {
copylen = len;
- linear = tap16_to_cpu(q, vnet_hdr.hdr_len);
- if (linear > good_linear)
- linear = good_linear;
- else if (linear < ETH_HLEN)
- linear = ETH_HLEN;
+ linear = clamp(hdr_len, ETH_HLEN, good_linear);
}
skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen,
@@ -733,8 +648,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
skb->dev = tap->dev;
if (vnet_hdr_len) {
- err = virtio_net_hdr_to_skb(skb, &vnet_hdr,
- tap_is_little_endian(q));
+ err = tun_vnet_hdr_to_skb(q->flags, skb, &vnet_hdr);
if (err) {
rcu_read_unlock();
drop_reason = SKB_DROP_REASON_DEV_HDR;
@@ -797,23 +711,17 @@ static ssize_t tap_put_user(struct tap_queue *q,
int total;
if (q->flags & IFF_VNET_HDR) {
- int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;
struct virtio_net_hdr vnet_hdr;
vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
- if (iov_iter_count(iter) < vnet_hdr_len)
- return -EINVAL;
- if (virtio_net_hdr_from_skb(skb, &vnet_hdr,
- tap_is_little_endian(q), true,
- vlan_hlen))
- BUG();
-
- if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) !=
- sizeof(vnet_hdr))
- return -EFAULT;
+ ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr);
+ if (ret)
+ return ret;
- iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr));
+ ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr);
+ if (ret)
+ return ret;
}
total = vnet_hdr_len;
total += skb->len;
@@ -1072,42 +980,6 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
q->sk.sk_sndbuf = s;
return 0;
- case TUNGETVNETHDRSZ:
- s = q->vnet_hdr_sz;
- if (put_user(s, sp))
- return -EFAULT;
- return 0;
-
- case TUNSETVNETHDRSZ:
- if (get_user(s, sp))
- return -EFAULT;
- if (s < (int)sizeof(struct virtio_net_hdr))
- return -EINVAL;
-
- q->vnet_hdr_sz = s;
- return 0;
-
- case TUNGETVNETLE:
- s = !!(q->flags & TAP_VNET_LE);
- if (put_user(s, sp))
- return -EFAULT;
- return 0;
-
- case TUNSETVNETLE:
- if (get_user(s, sp))
- return -EFAULT;
- if (s)
- q->flags |= TAP_VNET_LE;
- else
- q->flags &= ~TAP_VNET_LE;
- return 0;
-
- case TUNGETVNETBE:
- return tap_get_vnet_be(q, sp);
-
- case TUNSETVNETBE:
- return tap_set_vnet_be(q, sp);
-
case TUNSETOFFLOAD:
/* let the user check for future flags */
if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
@@ -1151,7 +1023,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
return ret;
default:
- return -EINVAL;
+ return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp);
}
}
@@ -1198,7 +1070,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
skb->protocol = eth_hdr(skb)->h_proto;
if (vnet_hdr_len) {
- err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q));
+ err = tun_vnet_hdr_to_skb(q->flags, skb, gso);
if (err)
goto err_kfree;
}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index acf96f262488..d8f4d3e996a7 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -83,6 +83,8 @@
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
+#include "tun_vnet.h"
+
static void tun_default_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd);
@@ -94,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev,
* overload it to mean fasync when stored there.
*/
#define TUN_FASYNC IFF_ATTACH_QUEUE
-/* High bits in flags field are unused. */
-#define TUN_VNET_LE 0x80000000
-#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
@@ -298,70 +297,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile)
return tfile->napi_frags_enabled;
}
-#ifdef CONFIG_TUN_VNET_CROSS_LE
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
-{
- return tun->flags & TUN_VNET_BE ? false :
- virtio_legacy_is_little_endian();
-}
-
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- int be = !!(tun->flags & TUN_VNET_BE);
-
- if (put_user(be, argp))
- return -EFAULT;
-
- return 0;
-}
-
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- int be;
-
- if (get_user(be, argp))
- return -EFAULT;
-
- if (be)
- tun->flags |= TUN_VNET_BE;
- else
- tun->flags &= ~TUN_VNET_BE;
-
- return 0;
-}
-#else
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
-{
- return virtio_legacy_is_little_endian();
-}
-
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- return -EINVAL;
-}
-
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_TUN_VNET_CROSS_LE */
-
-static inline bool tun_is_little_endian(struct tun_struct *tun)
-{
- return tun->flags & TUN_VNET_LE ||
- tun_legacy_is_little_endian(tun);
-}
-
-static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
-{
- return __virtio16_to_cpu(tun_is_little_endian(tun), val);
-}
-
-static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
-{
- return __cpu_to_virtio16(tun_is_little_endian(tun), val);
-}
-
static inline u32 tun_hashfn(u32 rxhash)
{
return rxhash & TUN_MASK_FLOW_ENTRIES;
@@ -1756,6 +1691,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
struct virtio_net_hdr gso = { 0 };
int good_linear;
int copylen;
+ int hdr_len = 0;
bool zerocopy = false;
int err;
u32 rxhash = 0;
@@ -1775,26 +1711,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun->flags & IFF_VNET_HDR) {
int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (len < vnet_hdr_sz)
- return -EINVAL;
- len -= vnet_hdr_sz;
+ hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso);
+ if (hdr_len < 0)
+ return hdr_len;
- if (!copy_from_iter_full(&gso, sizeof(gso), from))
- return -EFAULT;
-
- if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
- tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
- gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
-
- if (tun16_to_cpu(tun, gso.hdr_len) > len)
- return -EINVAL;
- iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
+ len -= vnet_hdr_sz;
}
if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
align += NET_IP_ALIGN;
- if (unlikely(len < ETH_HLEN ||
- (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN)))
+ if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN)))
return -EINVAL;
}
@@ -1807,9 +1733,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
* enough room for skb expand head in case it is used.
* The rest of the buffer is mapped from userspace.
*/
- copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN;
- if (copylen > good_linear)
- copylen = good_linear;
+ copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
linear = copylen;
iov_iter_advance(&i, copylen);
if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
@@ -1830,10 +1754,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
} else {
if (!zerocopy) {
copylen = len;
- if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
- linear = good_linear;
- else
- linear = tun16_to_cpu(tun, gso.hdr_len);
+ linear = min(hdr_len, good_linear);
}
if (frags) {
@@ -1868,7 +1789,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
}
}
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
+ if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) {
atomic_long_inc(&tun->rx_frame_errors);
err = -EINVAL;
goto free_skb;
@@ -2063,18 +1984,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
{
int vnet_hdr_sz = 0;
size_t size = xdp_frame->len;
- size_t ret;
+ ssize_t ret;
if (tun->flags & IFF_VNET_HDR) {
struct virtio_net_hdr gso = { 0 };
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
- return -EINVAL;
- if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
- sizeof(gso)))
- return -EFAULT;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+ if (ret)
+ return ret;
}
ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
@@ -2097,6 +2015,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
int vlan_offset = 0;
int vlan_hlen = 0;
int vnet_hdr_sz = 0;
+ int ret;
if (skb_vlan_tag_present(skb))
vlan_hlen = VLAN_HLEN;
@@ -2123,31 +2042,13 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vnet_hdr_sz) {
struct virtio_net_hdr gso;
- if (iov_iter_count(iter) < vnet_hdr_sz)
- return -EINVAL;
-
- if (virtio_net_hdr_from_skb(skb, &gso,
- tun_is_little_endian(tun), true,
- vlan_hlen)) {
- struct skb_shared_info *sinfo = skb_shinfo(skb);
-
- if (net_ratelimit()) {
- netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
- tun16_to_cpu(tun, gso.hdr_len));
- print_hex_dump(KERN_ERR, "tun: ",
- DUMP_PREFIX_NONE,
- 16, 1, skb->head,
- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
- }
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
-
- if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
- return -EFAULT;
+ ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
+ if (ret)
+ return ret;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+ if (ret)
+ return ret;
}
if (vlan_hlen) {
@@ -2507,7 +2408,7 @@ build:
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
+ if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
atomic_long_inc(&tun->rx_frame_errors);
kfree_skb(skb);
ret = -EINVAL;
@@ -3091,8 +2992,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
kgid_t group;
int ifindex;
int sndbuf;
- int vnet_hdr_sz;
- int le;
int ret;
bool do_notify = false;
@@ -3299,50 +3198,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
tun_set_sndbuf(tun);
break;
- case TUNGETVNETHDRSZ:
- vnet_hdr_sz = tun->vnet_hdr_sz;
- if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
- ret = -EFAULT;
- break;
-
- case TUNSETVNETHDRSZ:
- if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
- ret = -EFAULT;
- break;
- }
- if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
- ret = -EINVAL;
- break;
- }
-
- tun->vnet_hdr_sz = vnet_hdr_sz;
- break;
-
- case TUNGETVNETLE:
- le = !!(tun->flags & TUN_VNET_LE);
- if (put_user(le, (int __user *)argp))
- ret = -EFAULT;
- break;
-
- case TUNSETVNETLE:
- if (get_user(le, (int __user *)argp)) {
- ret = -EFAULT;
- break;
- }
- if (le)
- tun->flags |= TUN_VNET_LE;
- else
- tun->flags &= ~TUN_VNET_LE;
- break;
-
- case TUNGETVNETBE:
- ret = tun_get_vnet_be(tun, argp);
- break;
-
- case TUNSETVNETBE:
- ret = tun_set_vnet_be(tun, argp);
- break;
-
case TUNATTACHFILTER:
/* Can be set only for TAPs */
ret = -EINVAL;
@@ -3398,7 +3253,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
default:
- ret = -EINVAL;
+ ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
break;
}
diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h
new file mode 100644
index 000000000000..fd7411c4447f
--- /dev/null
+++ b/drivers/net/tun_vnet.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef TUN_VNET_H
+#define TUN_VNET_H
+
+/* High bits in flags field are unused. */
+#define TUN_VNET_LE 0x80000000
+#define TUN_VNET_BE 0x40000000
+
+static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags)
+{
+ bool be = IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) &&
+ (flags & TUN_VNET_BE);
+
+ return !be && virtio_legacy_is_little_endian();
+}
+
+static inline long tun_get_vnet_be(unsigned int flags, int __user *argp)
+{
+ int be = !!(flags & TUN_VNET_BE);
+
+ if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE))
+ return -EINVAL;
+
+ if (put_user(be, argp))
+ return -EFAULT;
+
+ return 0;
+}
+
+static inline long tun_set_vnet_be(unsigned int *flags, int __user *argp)
+{
+ int be;
+
+ if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE))
+ return -EINVAL;
+
+ if (get_user(be, argp))
+ return -EFAULT;
+
+ if (be)
+ *flags |= TUN_VNET_BE;
+ else
+ *flags &= ~TUN_VNET_BE;
+
+ return 0;
+}
+
+static inline bool tun_vnet_is_little_endian(unsigned int flags)
+{
+ return flags & TUN_VNET_LE || tun_vnet_legacy_is_little_endian(flags);
+}
+
+static inline u16 tun_vnet16_to_cpu(unsigned int flags, __virtio16 val)
+{
+ return __virtio16_to_cpu(tun_vnet_is_little_endian(flags), val);
+}
+
+static inline __virtio16 cpu_to_tun_vnet16(unsigned int flags, u16 val)
+{
+ return __cpu_to_virtio16(tun_vnet_is_little_endian(flags), val);
+}
+
+static inline long tun_vnet_ioctl(int *vnet_hdr_sz, unsigned int *flags,
+ unsigned int cmd, int __user *sp)
+{
+ int s;
+
+ switch (cmd) {
+ case TUNGETVNETHDRSZ:
+ s = *vnet_hdr_sz;
+ if (put_user(s, sp))
+ return -EFAULT;
+ return 0;
+
+ case TUNSETVNETHDRSZ:
+ if (get_user(s, sp))
+ return -EFAULT;
+ if (s < (int)sizeof(struct virtio_net_hdr))
+ return -EINVAL;
+
+ *vnet_hdr_sz = s;
+ return 0;
+
+ case TUNGETVNETLE:
+ s = !!(*flags & TUN_VNET_LE);
+ if (put_user(s, sp))
+ return -EFAULT;
+ return 0;
+
+ case TUNSETVNETLE:
+ if (get_user(s, sp))
+ return -EFAULT;
+ if (s)
+ *flags |= TUN_VNET_LE;
+ else
+ *flags &= ~TUN_VNET_LE;
+ return 0;
+
+ case TUNGETVNETBE:
+ return tun_get_vnet_be(*flags, sp);
+
+ case TUNSETVNETBE:
+ return tun_set_vnet_be(flags, sp);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
+ struct iov_iter *from,
+ struct virtio_net_hdr *hdr)
+{
+ u16 hdr_len;
+
+ if (iov_iter_count(from) < sz)
+ return -EINVAL;
+
+ if (!copy_from_iter_full(hdr, sizeof(*hdr), from))
+ return -EFAULT;
+
+ hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len);
+
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+ hdr_len = max(tun_vnet16_to_cpu(flags, hdr->csum_start) + tun_vnet16_to_cpu(flags, hdr->csum_offset) + 2, hdr_len);
+ hdr->hdr_len = cpu_to_tun_vnet16(flags, hdr_len);
+ }
+
+ if (hdr_len > iov_iter_count(from))
+ return -EINVAL;
+
+ iov_iter_advance(from, sz - sizeof(*hdr));
+
+ return hdr_len;
+}
+
+static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
+ const struct virtio_net_hdr *hdr)
+{
+ if (unlikely(iov_iter_count(iter) < sz))
+ return -EINVAL;
+
+ if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr)))
+ return -EFAULT;
+
+ iov_iter_advance(iter, sz - sizeof(*hdr));
+
+ return 0;
+}
+
+static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb,
+ const struct virtio_net_hdr *hdr)
+{
+ return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags));
+}
+
+static inline int tun_vnet_hdr_from_skb(unsigned int flags,
+ const struct net_device *dev,
+ const struct sk_buff *skb,
+ struct virtio_net_hdr *hdr)
+{
+ int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;
+
+ if (virtio_net_hdr_from_skb(skb, hdr,
+ tun_vnet_is_little_endian(flags), true,
+ vlan_hlen)) {
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ if (net_ratelimit()) {
+ netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
+ sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->gso_size),
+ tun_vnet16_to_cpu(flags, hdr->hdr_len));
+ print_hex_dump(KERN_ERR, "tun: ",
+ DUMP_PREFIX_NONE,
+ 16, 1, skb->head,
+ min(tun_vnet16_to_cpu(flags, hdr->hdr_len), 64), true);
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#endif /* TUN_VNET_H */
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 57d6e5abc30e..da24941a6e44 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -1421,6 +1421,19 @@ static const struct driver_info hg20f9_info = {
.data = FLAG_EEPROM_MAC,
};
+static const struct driver_info lyconsys_fibergecko100_info = {
+ .description = "LyconSys FiberGecko 100 USB 2.0 to SFP Adapter",
+ .bind = ax88178_bind,
+ .status = asix_status,
+ .link_reset = ax88178_link_reset,
+ .reset = ax88178_link_reset,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
+ FLAG_MULTI_PACKET,
+ .rx_fixup = asix_rx_fixup_common,
+ .tx_fixup = asix_tx_fixup,
+ .data = 0x20061201,
+};
+
static const struct usb_device_id products [] = {
{
// Linksys USB200M
@@ -1578,6 +1591,10 @@ static const struct usb_device_id products [] = {
// Linux Automation GmbH USB 10Base-T1L
USB_DEVICE(0x33f7, 0x0004),
.driver_info = (unsigned long) &lxausb_t1l_info,
+}, {
+ /* LyconSys FiberGecko 100 */
+ USB_DEVICE(0x1d2a, 0x0801),
+ .driver_info = (unsigned long) &lyconsys_fibergecko100_info,
},
{ }, // END
};
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index e13e4920ee9b..88921c13b629 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -660,7 +660,7 @@ static const struct usb_device_id mbim_devs[] = {
.driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
},
- /* Telit FN990 */
+ /* Telit FN990A */
{ USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1071, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
.driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
},
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index e9208a8d2bfa..14d1c85c8000 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1360,7 +1360,7 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */
- {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990A */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */
@@ -1368,6 +1368,7 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x1bc7, 0x10c0, 0)}, /* Telit FE910C04 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x10c4, 0)}, /* Telit FE910C04 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x10c8, 0)}, /* Telit FE910C04 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10d0, 0)}, /* Telit FN990B */
{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 468c73974046..e1021148d3a6 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -10079,6 +10079,7 @@ static const struct usb_device_id rtl8152_table[] = {
{ USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) },
{ USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) },
{ USB_DEVICE(VENDOR_ID_DLINK, 0xb301) },
+ { USB_DEVICE(VENDOR_ID_DELL, 0xb097) },
{ USB_DEVICE(VENDOR_ID_ASUS, 0x1976) },
{}
};
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index ca81b212a246..5f21ce1013c4 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1537,14 +1537,12 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
nlmsg_end(skb, nlh);
- /* fib_nl_{new,del}rule handling looks for net from skb->sk */
- skb->sk = dev_net(dev)->rtnl;
if (add_it) {
- err = fib_nl_newrule(skb, nlh, NULL);
+ err = fib_newrule(dev_net(dev), skb, nlh, NULL, true);
if (err == -EEXIST)
err = 0;
} else {
- err = fib_nl_delrule(skb, nlh, NULL);
+ err = fib_delrule(dev_net(dev), skb, nlh, NULL, true);
if (err == -ENOENT)
err = 0;
}
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 92516189e792..e2354c02def0 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -227,9 +227,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
be32_to_cpu(fdb->vni)))
goto nla_put_failure;
- ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
+ ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used));
ci.ndm_confirmed = 0;
- ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
+ ci.ndm_updated = jiffies_to_clock_t(now - READ_ONCE(fdb->updated));
ci.ndm_refcnt = 0;
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
@@ -434,8 +434,12 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
f = __vxlan_find_mac(vxlan, mac, vni);
- if (f && f->used != jiffies)
- f->used = jiffies;
+ if (f) {
+ unsigned long now = jiffies;
+
+ if (READ_ONCE(f->used) != now)
+ WRITE_ONCE(f->used, now);
+ }
return f;
}
@@ -1009,12 +1013,10 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
!(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
if (f->state != state) {
f->state = state;
- f->updated = jiffies;
notify = 1;
}
if (f->flags != fdb_flags) {
f->flags = fdb_flags;
- f->updated = jiffies;
notify = 1;
}
}
@@ -1048,12 +1050,13 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
}
if (ndm_flags & NTF_USE)
- f->used = jiffies;
+ WRITE_ONCE(f->updated, jiffies);
if (notify) {
if (rd == NULL)
rd = first_remote_rtnl(f);
+ WRITE_ONCE(f->updated, jiffies);
err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
swdev_notify, extack);
if (err)
@@ -1292,7 +1295,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
int err = -ENOENT;
- f = vxlan_find_mac(vxlan, addr, src_vni);
+ f = __vxlan_find_mac(vxlan, addr, src_vni);
if (!f)
return err;
@@ -1459,9 +1462,13 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
ifindex = src_ifindex;
#endif
- f = vxlan_find_mac(vxlan, src_mac, vni);
+ f = __vxlan_find_mac(vxlan, src_mac, vni);
if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f);
+ unsigned long now = jiffies;
+
+ if (READ_ONCE(f->updated) != now)
+ WRITE_ONCE(f->updated, now);
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
rdst->remote_ifindex == ifindex))
@@ -1481,7 +1488,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
src_mac, &rdst->remote_ip.sa, &src_ip->sa);
rdst->remote_ip = *src_ip;
- f->updated = jiffies;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
@@ -1664,7 +1670,6 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
return err <= 1;
}
-/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
{
struct vxlan_vni_node *vninode = NULL;
@@ -1834,7 +1839,6 @@ drop:
return 0;
}
-/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct vxlan_dev *vxlan;
@@ -2852,7 +2856,7 @@ static void vxlan_cleanup(struct timer_list *t)
if (f->flags & NTF_EXT_LEARNED)
continue;
- timeout = f->used + vxlan->cfg.age_interval * HZ;
+ timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ;
if (time_before_eq(timeout, jiffies)) {
netdev_dbg(vxlan->dev,
"garbage collect %pM\n",
@@ -3932,7 +3936,7 @@ static void vxlan_config_apply(struct net_device *dev,
}
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
- struct vxlan_config *conf, bool changelink,
+ struct vxlan_config *conf,
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -3943,7 +3947,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
if (ret)
return ret;
- vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
+ vxlan_config_apply(dev, conf, lowerdev, src_net, false);
return 0;
}
@@ -3961,7 +3965,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
int err;
dst = &vxlan->default_dst;
- err = vxlan_dev_configure(net, dev, conf, false, extack);
+ err = vxlan_dev_configure(net, dev, conf, extack);
if (err)
return err;
@@ -4415,6 +4419,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ bool rem_ip_changed, change_igmp;
struct net_device *lowerdev;
struct vxlan_config conf;
struct vxlan_rdst *dst;
@@ -4438,8 +4443,13 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (err)
return err;
+ rem_ip_changed = !vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip);
+ change_igmp = vxlan->dev->flags & IFF_UP &&
+ (rem_ip_changed ||
+ dst->remote_ifindex != conf.remote_ifindex);
+
/* handle default dst entry */
- if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
+ if (rem_ip_changed) {
u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
spin_lock_bh(&vxlan->hash_lock[hash_index]);
@@ -4483,6 +4493,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
}
}
+ if (change_igmp && vxlan_addr_multicast(&dst->remote_ip))
+ err = vxlan_multicast_leave(vxlan);
+
if (conf.age_interval != vxlan->cfg.age_interval)
mod_timer(&vxlan->age_timer, jiffies);
@@ -4490,7 +4503,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (lowerdev && lowerdev != dst->remote_dev)
dst->remote_dev = lowerdev;
vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
- return 0;
+
+ if (!err && change_igmp &&
+ vxlan_addr_multicast(&dst->remote_ip))
+ err = vxlan_multicast_join(vxlan);
+
+ return err;
}
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
@@ -4768,7 +4786,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev,
spin_lock_bh(&vxlan->hash_lock[hash_index]);
- f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
goto out;
@@ -4824,7 +4842,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev,
hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
spin_lock_bh(&vxlan->hash_lock[hash_index]);
- f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
err = -ENOENT;
else if (f->flags & NTF_EXT_LEARNED)
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
index 02f2ec7cf4ce..8bf63f2dcbbf 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.c
+++ b/drivers/net/wwan/t7xx/t7xx_pci.c
@@ -32,7 +32,6 @@
#include <linux/pci.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
-#include <linux/pm_wakeup.h>
#include <linux/spinlock.h>
#include "t7xx_mhccif.h"
diff --git a/drivers/of/base.c b/drivers/of/base.c
index af6c68bbb427..e37b088f1fad 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -824,6 +824,33 @@ struct device_node *of_get_child_by_name(const struct device_node *node,
}
EXPORT_SYMBOL(of_get_child_by_name);
+/**
+ * of_get_available_child_by_name - Find the available child node by name for a given parent
+ * @node: parent node
+ * @name: child name to look for.
+ *
+ * This function looks for child node for given matching name and checks the
+ * device's availability for use.
+ *
+ * Return: A node pointer if found, with refcount incremented, use
+ * of_node_put() on it when done.
+ * Returns NULL if node is not found.
+ */
+struct device_node *of_get_available_child_by_name(const struct device_node *node,
+ const char *name)
+{
+ struct device_node *child;
+
+ child = of_get_child_by_name(node, name);
+ if (child && !of_device_is_available(child)) {
+ of_node_put(child);
+ return NULL;
+ }
+
+ return child;
+}
+EXPORT_SYMBOL(of_get_available_child_by_name);
+
struct device_node *__of_find_node_by_path(const struct device_node *parent,
const char *path)
{
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index c61e6427384c..9eb9e3c49f81 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -2,15 +2,6 @@
menu "S/390 network device drivers"
depends on NETDEVICES && S390
-config LCS
- def_tristate m
- prompt "Lan Channel Station Interface"
- depends on CCW && NETDEVICES && ETHERNET
- help
- Select this option if you want to use LCS networking on IBM System z.
- To compile as a module, choose M. The module name is lcs.
- If you do not use LCS, choose N.
-
config CTCM
def_tristate m
prompt "CTC and MPC SNA device support"
@@ -98,7 +89,7 @@ config QETH_OSX
config CCWGROUP
tristate
- default (LCS || CTCM || QETH || SMC)
+ default (CTCM || QETH || SMC)
config ISM
tristate "Support for ISM vPCI Adapter"
diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile
index bc55ec316adb..b5aaba290127 100644
--- a/drivers/s390/net/Makefile
+++ b/drivers/s390/net/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_CTCM) += ctcm.o fsm.o
obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o
obj-$(CONFIG_SMSGIUCV) += smsgiucv.o
obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o
-obj-$(CONFIG_LCS) += lcs.o
qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o
obj-$(CONFIG_QETH) += qeth.o
qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
deleted file mode 100644
index 88db8378325a..000000000000
--- a/drivers/s390/net/lcs.c
+++ /dev/null
@@ -1,2385 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Linux for S/390 LAN channel station device driver
- *
- * Copyright IBM Corp. 1999, 2009
- * Author(s): Original Code written by
- * DJ Barrow <djbarrow@de.ibm.com,barrow_dj@yahoo.com>
- * Rewritten by
- * Frank Pavlic <fpavlic@de.ibm.com> and
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#define KMSG_COMPONENT "lcs"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/in.h>
-#include <linux/igmp.h>
-#include <linux/delay.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-#include <net/arp.h>
-#include <net/ip.h>
-
-#include <asm/debug.h>
-#include <asm/idals.h>
-#include <asm/timex.h>
-#include <linux/device.h>
-#include <asm/ccwgroup.h>
-
-#include "lcs.h"
-
-
-/*
- * initialization string for output
- */
-
-static char version[] __initdata = "LCS driver";
-
-/*
- * the root device for lcs group devices
- */
-static struct device *lcs_root_dev;
-
-/*
- * Some prototypes.
- */
-static void lcs_tasklet(unsigned long);
-static void lcs_start_kernel_thread(struct work_struct *);
-static void lcs_get_frames_cb(struct lcs_channel *, struct lcs_buffer *);
-#ifdef CONFIG_IP_MULTICAST
-static int lcs_send_delipm(struct lcs_card *, struct lcs_ipm_list *);
-#endif /* CONFIG_IP_MULTICAST */
-static int lcs_recovery(void *ptr);
-
-/*
- * Debug Facility Stuff
- */
-static char debug_buffer[255];
-static debug_info_t *lcs_dbf_setup;
-static debug_info_t *lcs_dbf_trace;
-
-/*
- * LCS Debug Facility functions
- */
-static void
-lcs_unregister_debug_facility(void)
-{
- debug_unregister(lcs_dbf_setup);
- debug_unregister(lcs_dbf_trace);
-}
-
-static int
-lcs_register_debug_facility(void)
-{
- lcs_dbf_setup = debug_register("lcs_setup", 2, 1, 8);
- lcs_dbf_trace = debug_register("lcs_trace", 4, 1, 8);
- if (lcs_dbf_setup == NULL || lcs_dbf_trace == NULL) {
- pr_err("Not enough memory for debug facility.\n");
- lcs_unregister_debug_facility();
- return -ENOMEM;
- }
- debug_register_view(lcs_dbf_setup, &debug_hex_ascii_view);
- debug_set_level(lcs_dbf_setup, 2);
- debug_register_view(lcs_dbf_trace, &debug_hex_ascii_view);
- debug_set_level(lcs_dbf_trace, 2);
- return 0;
-}
-
-/*
- * Allocate io buffers.
- */
-static int
-lcs_alloc_channel(struct lcs_channel *channel)
-{
- int cnt;
-
- LCS_DBF_TEXT(2, setup, "ichalloc");
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- /* alloc memory fo iobuffer */
- channel->iob[cnt].data =
- kzalloc(LCS_IOBUFFERSIZE, GFP_DMA | GFP_KERNEL);
- if (channel->iob[cnt].data == NULL)
- break;
- channel->iob[cnt].state = LCS_BUF_STATE_EMPTY;
- }
- if (cnt < LCS_NUM_BUFFS) {
- /* Not all io buffers could be allocated. */
- LCS_DBF_TEXT(2, setup, "echalloc");
- while (cnt-- > 0)
- kfree(channel->iob[cnt].data);
- return -ENOMEM;
- }
- return 0;
-}
-
-/*
- * Free io buffers.
- */
-static void
-lcs_free_channel(struct lcs_channel *channel)
-{
- int cnt;
-
- LCS_DBF_TEXT(2, setup, "ichfree");
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- kfree(channel->iob[cnt].data);
- channel->iob[cnt].data = NULL;
- }
-}
-
-/*
- * Cleanup channel.
- */
-static void
-lcs_cleanup_channel(struct lcs_channel *channel)
-{
- LCS_DBF_TEXT(3, setup, "cleanch");
- /* Kill write channel tasklets. */
- tasklet_kill(&channel->irq_tasklet);
- /* Free channel buffers. */
- lcs_free_channel(channel);
-}
-
-/*
- * LCS free memory for card and channels.
- */
-static void
-lcs_free_card(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, setup, "remcard");
- LCS_DBF_HEX(2, setup, &card, sizeof(void*));
- kfree(card);
-}
-
-/*
- * LCS alloc memory for card and channels
- */
-static struct lcs_card *
-lcs_alloc_card(void)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(2, setup, "alloclcs");
-
- card = kzalloc(sizeof(struct lcs_card), GFP_KERNEL | GFP_DMA);
- if (card == NULL)
- return NULL;
- card->lan_type = LCS_FRAME_TYPE_AUTO;
- card->pkt_seq = 0;
- card->lancmd_timeout = LCS_LANCMD_TIMEOUT_DEFAULT;
- /* Allocate io buffers for the read channel. */
- rc = lcs_alloc_channel(&card->read);
- if (rc){
- LCS_DBF_TEXT(2, setup, "iccwerr");
- lcs_free_card(card);
- return NULL;
- }
- /* Allocate io buffers for the write channel. */
- rc = lcs_alloc_channel(&card->write);
- if (rc) {
- LCS_DBF_TEXT(2, setup, "iccwerr");
- lcs_cleanup_channel(&card->read);
- lcs_free_card(card);
- return NULL;
- }
-
-#ifdef CONFIG_IP_MULTICAST
- INIT_LIST_HEAD(&card->ipm_list);
-#endif
- LCS_DBF_HEX(2, setup, &card, sizeof(void*));
- return card;
-}
-
-/*
- * Setup read channel.
- */
-static void
-lcs_setup_read_ccws(struct lcs_card *card)
-{
- int cnt;
-
- LCS_DBF_TEXT(2, setup, "ireadccw");
- /* Setup read ccws. */
- memset(card->read.ccws, 0, sizeof (struct ccw1) * (LCS_NUM_BUFFS + 1));
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- card->read.ccws[cnt].cmd_code = LCS_CCW_READ;
- card->read.ccws[cnt].count = LCS_IOBUFFERSIZE;
- card->read.ccws[cnt].flags =
- CCW_FLAG_CC | CCW_FLAG_SLI | CCW_FLAG_PCI;
- /*
- * Note: we have allocated the buffer with GFP_DMA, so
- * we do not need to do set_normalized_cda.
- */
- card->read.ccws[cnt].cda =
- virt_to_dma32(card->read.iob[cnt].data);
- ((struct lcs_header *)
- card->read.iob[cnt].data)->offset = LCS_ILLEGAL_OFFSET;
- card->read.iob[cnt].callback = lcs_get_frames_cb;
- card->read.iob[cnt].state = LCS_BUF_STATE_READY;
- card->read.iob[cnt].count = LCS_IOBUFFERSIZE;
- }
- card->read.ccws[0].flags &= ~CCW_FLAG_PCI;
- card->read.ccws[LCS_NUM_BUFFS - 1].flags &= ~CCW_FLAG_PCI;
- card->read.ccws[LCS_NUM_BUFFS - 1].flags |= CCW_FLAG_SUSPEND;
- /* Last ccw is a tic (transfer in channel). */
- card->read.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER;
- card->read.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->read.ccws);
- /* Setg initial state of the read channel. */
- card->read.state = LCS_CH_STATE_INIT;
-
- card->read.io_idx = 0;
- card->read.buf_idx = 0;
-}
-
-static void
-lcs_setup_read(struct lcs_card *card)
-{
- LCS_DBF_TEXT(3, setup, "initread");
-
- lcs_setup_read_ccws(card);
- /* Initialize read channel tasklet. */
- card->read.irq_tasklet.data = (unsigned long) &card->read;
- card->read.irq_tasklet.func = lcs_tasklet;
- /* Initialize waitqueue. */
- init_waitqueue_head(&card->read.wait_q);
-}
-
-/*
- * Setup write channel.
- */
-static void
-lcs_setup_write_ccws(struct lcs_card *card)
-{
- int cnt;
-
- LCS_DBF_TEXT(3, setup, "iwritccw");
- /* Setup write ccws. */
- memset(card->write.ccws, 0, sizeof(struct ccw1) * (LCS_NUM_BUFFS + 1));
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- card->write.ccws[cnt].cmd_code = LCS_CCW_WRITE;
- card->write.ccws[cnt].count = 0;
- card->write.ccws[cnt].flags =
- CCW_FLAG_SUSPEND | CCW_FLAG_CC | CCW_FLAG_SLI;
- /*
- * Note: we have allocated the buffer with GFP_DMA, so
- * we do not need to do set_normalized_cda.
- */
- card->write.ccws[cnt].cda =
- virt_to_dma32(card->write.iob[cnt].data);
- }
- /* Last ccw is a tic (transfer in channel). */
- card->write.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER;
- card->write.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->write.ccws);
- /* Set initial state of the write channel. */
- card->read.state = LCS_CH_STATE_INIT;
-
- card->write.io_idx = 0;
- card->write.buf_idx = 0;
-}
-
-static void
-lcs_setup_write(struct lcs_card *card)
-{
- LCS_DBF_TEXT(3, setup, "initwrit");
-
- lcs_setup_write_ccws(card);
- /* Initialize write channel tasklet. */
- card->write.irq_tasklet.data = (unsigned long) &card->write;
- card->write.irq_tasklet.func = lcs_tasklet;
- /* Initialize waitqueue. */
- init_waitqueue_head(&card->write.wait_q);
-}
-
-static void
-lcs_set_allowed_threads(struct lcs_card *card, unsigned long threads)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- card->thread_allowed_mask = threads;
- spin_unlock_irqrestore(&card->mask_lock, flags);
- wake_up(&card->wait_q);
-}
-static int lcs_threads_running(struct lcs_card *card, unsigned long threads)
-{
- unsigned long flags;
- int rc = 0;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- rc = (card->thread_running_mask & threads);
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return rc;
-}
-
-static int
-lcs_wait_for_threads(struct lcs_card *card, unsigned long threads)
-{
- return wait_event_interruptible(card->wait_q,
- lcs_threads_running(card, threads) == 0);
-}
-
-static int lcs_set_thread_start_bit(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- if ( !(card->thread_allowed_mask & thread) ||
- (card->thread_start_mask & thread) ) {
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return -EPERM;
- }
- card->thread_start_mask |= thread;
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return 0;
-}
-
-static void
-lcs_clear_thread_running_bit(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- card->thread_running_mask &= ~thread;
- spin_unlock_irqrestore(&card->mask_lock, flags);
- wake_up(&card->wait_q);
-}
-
-static int __lcs_do_run_thread(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
- int rc = 0;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- if (card->thread_start_mask & thread){
- if ((card->thread_allowed_mask & thread) &&
- !(card->thread_running_mask & thread)){
- rc = 1;
- card->thread_start_mask &= ~thread;
- card->thread_running_mask |= thread;
- } else
- rc = -EPERM;
- }
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return rc;
-}
-
-static int
-lcs_do_run_thread(struct lcs_card *card, unsigned long thread)
-{
- int rc = 0;
- wait_event(card->wait_q,
- (rc = __lcs_do_run_thread(card, thread)) >= 0);
- return rc;
-}
-
-static int
-lcs_do_start_thread(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
- int rc = 0;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- LCS_DBF_TEXT_(4, trace, " %02x%02x%02x",
- (u8) card->thread_start_mask,
- (u8) card->thread_allowed_mask,
- (u8) card->thread_running_mask);
- rc = (card->thread_start_mask & thread);
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return rc;
-}
-
-/*
- * Initialize channels,card and state machines.
- */
-static void
-lcs_setup_card(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, setup, "initcard");
- LCS_DBF_HEX(2, setup, &card, sizeof(void*));
-
- lcs_setup_read(card);
- lcs_setup_write(card);
- /* Set cards initial state. */
- card->state = DEV_STATE_DOWN;
- card->tx_buffer = NULL;
- card->tx_emitted = 0;
-
- init_waitqueue_head(&card->wait_q);
- spin_lock_init(&card->lock);
- spin_lock_init(&card->ipm_lock);
- spin_lock_init(&card->mask_lock);
-#ifdef CONFIG_IP_MULTICAST
- INIT_LIST_HEAD(&card->ipm_list);
-#endif
- INIT_LIST_HEAD(&card->lancmd_waiters);
-}
-
-static void lcs_clear_multicast_list(struct lcs_card *card)
-{
-#ifdef CONFIG_IP_MULTICAST
- struct lcs_ipm_list *ipm;
- unsigned long flags;
-
- /* Free multicast list. */
- LCS_DBF_TEXT(3, setup, "clmclist");
- spin_lock_irqsave(&card->ipm_lock, flags);
- while (!list_empty(&card->ipm_list)){
- ipm = list_entry(card->ipm_list.next,
- struct lcs_ipm_list, list);
- list_del(&ipm->list);
- if (ipm->ipm_state != LCS_IPM_STATE_SET_REQUIRED){
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- lcs_send_delipm(card, ipm);
- spin_lock_irqsave(&card->ipm_lock, flags);
- }
- kfree(ipm);
- }
- spin_unlock_irqrestore(&card->ipm_lock, flags);
-#endif
-}
-
-/*
- * Cleanup channels,card and state machines.
- */
-static void
-lcs_cleanup_card(struct lcs_card *card)
-{
-
- LCS_DBF_TEXT(3, setup, "cleancrd");
- LCS_DBF_HEX(2,setup,&card,sizeof(void*));
-
- if (card->dev != NULL)
- free_netdev(card->dev);
- /* Cleanup channels. */
- lcs_cleanup_channel(&card->write);
- lcs_cleanup_channel(&card->read);
-}
-
-/*
- * Start channel.
- */
-static int
-lcs_start_channel(struct lcs_channel *channel)
-{
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT_(4, trace,"ssch%s", dev_name(&channel->ccwdev->dev));
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- rc = ccw_device_start(channel->ccwdev,
- channel->ccws + channel->io_idx, 0, 0,
- DOIO_DENY_PREFETCH | DOIO_ALLOW_SUSPEND);
- if (rc == 0)
- channel->state = LCS_CH_STATE_RUNNING;
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- if (rc) {
- LCS_DBF_TEXT_(4,trace,"essh%s",
- dev_name(&channel->ccwdev->dev));
- dev_err(&channel->ccwdev->dev,
- "Starting an LCS device resulted in an error,"
- " rc=%d!\n", rc);
- }
- return rc;
-}
-
-static int
-lcs_clear_channel(struct lcs_channel *channel)
-{
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT(4,trace,"clearch");
- LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev));
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- rc = ccw_device_clear(channel->ccwdev, 0);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- if (rc) {
- LCS_DBF_TEXT_(4, trace, "ecsc%s",
- dev_name(&channel->ccwdev->dev));
- return rc;
- }
- wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_CLEARED));
- channel->state = LCS_CH_STATE_STOPPED;
- return rc;
-}
-
-
-/*
- * Stop channel.
- */
-static int
-lcs_stop_channel(struct lcs_channel *channel)
-{
- unsigned long flags;
- int rc;
-
- if (channel->state == LCS_CH_STATE_STOPPED)
- return 0;
- LCS_DBF_TEXT(4,trace,"haltsch");
- LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev));
- channel->state = LCS_CH_STATE_INIT;
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- rc = ccw_device_halt(channel->ccwdev, 0);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- if (rc) {
- LCS_DBF_TEXT_(4, trace, "ehsc%s",
- dev_name(&channel->ccwdev->dev));
- return rc;
- }
- /* Asynchronous halt initialted. Wait for its completion. */
- wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_HALTED));
- lcs_clear_channel(channel);
- return 0;
-}
-
-/*
- * start read and write channel
- */
-static int
-lcs_start_channels(struct lcs_card *card)
-{
- int rc;
-
- LCS_DBF_TEXT(2, trace, "chstart");
- /* start read channel */
- rc = lcs_start_channel(&card->read);
- if (rc)
- return rc;
- /* start write channel */
- rc = lcs_start_channel(&card->write);
- if (rc)
- lcs_stop_channel(&card->read);
- return rc;
-}
-
-/*
- * stop read and write channel
- */
-static int
-lcs_stop_channels(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, trace, "chhalt");
- lcs_stop_channel(&card->read);
- lcs_stop_channel(&card->write);
- return 0;
-}
-
-/*
- * Get empty buffer.
- */
-static struct lcs_buffer *
-__lcs_get_buffer(struct lcs_channel *channel)
-{
- int index;
-
- LCS_DBF_TEXT(5, trace, "_getbuff");
- index = channel->io_idx;
- do {
- if (channel->iob[index].state == LCS_BUF_STATE_EMPTY) {
- channel->iob[index].state = LCS_BUF_STATE_LOCKED;
- return channel->iob + index;
- }
- index = (index + 1) & (LCS_NUM_BUFFS - 1);
- } while (index != channel->io_idx);
- return NULL;
-}
-
-static struct lcs_buffer *
-lcs_get_buffer(struct lcs_channel *channel)
-{
- struct lcs_buffer *buffer;
- unsigned long flags;
-
- LCS_DBF_TEXT(5, trace, "getbuff");
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- buffer = __lcs_get_buffer(channel);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- return buffer;
-}
-
-/*
- * Resume channel program if the channel is suspended.
- */
-static int
-__lcs_resume_channel(struct lcs_channel *channel)
-{
- int rc;
-
- if (channel->state != LCS_CH_STATE_SUSPENDED)
- return 0;
- if (channel->ccws[channel->io_idx].flags & CCW_FLAG_SUSPEND)
- return 0;
- LCS_DBF_TEXT_(5, trace, "rsch%s", dev_name(&channel->ccwdev->dev));
- rc = ccw_device_resume(channel->ccwdev);
- if (rc) {
- LCS_DBF_TEXT_(4, trace, "ersc%s",
- dev_name(&channel->ccwdev->dev));
- dev_err(&channel->ccwdev->dev,
- "Sending data from the LCS device to the LAN failed"
- " with rc=%d\n",rc);
- } else
- channel->state = LCS_CH_STATE_RUNNING;
- return rc;
-
-}
-
-/*
- * Make a buffer ready for processing.
- */
-static void __lcs_ready_buffer_bits(struct lcs_channel *channel, int index)
-{
- int prev, next;
-
- LCS_DBF_TEXT(5, trace, "rdybits");
- prev = (index - 1) & (LCS_NUM_BUFFS - 1);
- next = (index + 1) & (LCS_NUM_BUFFS - 1);
- /* Check if we may clear the suspend bit of this buffer. */
- if (channel->ccws[next].flags & CCW_FLAG_SUSPEND) {
- /* Check if we have to set the PCI bit. */
- if (!(channel->ccws[prev].flags & CCW_FLAG_SUSPEND))
- /* Suspend bit of the previous buffer is not set. */
- channel->ccws[index].flags |= CCW_FLAG_PCI;
- /* Suspend bit of the next buffer is set. */
- channel->ccws[index].flags &= ~CCW_FLAG_SUSPEND;
- }
-}
-
-static int
-lcs_ready_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- unsigned long flags;
- int index, rc;
-
- LCS_DBF_TEXT(5, trace, "rdybuff");
- BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED &&
- buffer->state != LCS_BUF_STATE_PROCESSED);
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- buffer->state = LCS_BUF_STATE_READY;
- index = buffer - channel->iob;
- /* Set length. */
- channel->ccws[index].count = buffer->count;
- /* Check relevant PCI/suspend bits. */
- __lcs_ready_buffer_bits(channel, index);
- rc = __lcs_resume_channel(channel);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- return rc;
-}
-
-/*
- * Mark the buffer as processed. Take care of the suspend bit
- * of the previous buffer. This function is called from
- * interrupt context, so the lock must not be taken.
- */
-static int
-__lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- int index, prev, next;
-
- LCS_DBF_TEXT(5, trace, "prcsbuff");
- BUG_ON(buffer->state != LCS_BUF_STATE_READY);
- buffer->state = LCS_BUF_STATE_PROCESSED;
- index = buffer - channel->iob;
- prev = (index - 1) & (LCS_NUM_BUFFS - 1);
- next = (index + 1) & (LCS_NUM_BUFFS - 1);
- /* Set the suspend bit and clear the PCI bit of this buffer. */
- channel->ccws[index].flags |= CCW_FLAG_SUSPEND;
- channel->ccws[index].flags &= ~CCW_FLAG_PCI;
- /* Check the suspend bit of the previous buffer. */
- if (channel->iob[prev].state == LCS_BUF_STATE_READY) {
- /*
- * Previous buffer is in state ready. It might have
- * happened in lcs_ready_buffer that the suspend bit
- * has not been cleared to avoid an endless loop.
- * Do it now.
- */
- __lcs_ready_buffer_bits(channel, prev);
- }
- /* Clear PCI bit of next buffer. */
- channel->ccws[next].flags &= ~CCW_FLAG_PCI;
- return __lcs_resume_channel(channel);
-}
-
-/*
- * Put a processed buffer back to state empty.
- */
-static void
-lcs_release_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- unsigned long flags;
-
- LCS_DBF_TEXT(5, trace, "relbuff");
- BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED &&
- buffer->state != LCS_BUF_STATE_PROCESSED);
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- buffer->state = LCS_BUF_STATE_EMPTY;
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
-}
-
-/*
- * Get buffer for a lan command.
- */
-static struct lcs_buffer *
-lcs_get_lancmd(struct lcs_card *card, int count)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(4, trace, "getlncmd");
- /* Get buffer and wait if none is available. */
- wait_event(card->write.wait_q,
- ((buffer = lcs_get_buffer(&card->write)) != NULL));
- count += sizeof(struct lcs_header);
- *(__u16 *)(buffer->data + count) = 0;
- buffer->count = count + sizeof(__u16);
- buffer->callback = lcs_release_buffer;
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->offset = count;
- cmd->type = LCS_FRAME_TYPE_CONTROL;
- cmd->slot = 0;
- return buffer;
-}
-
-
-static void
-lcs_get_reply(struct lcs_reply *reply)
-{
- refcount_inc(&reply->refcnt);
-}
-
-static void
-lcs_put_reply(struct lcs_reply *reply)
-{
- if (refcount_dec_and_test(&reply->refcnt))
- kfree(reply);
-}
-
-static struct lcs_reply *
-lcs_alloc_reply(struct lcs_cmd *cmd)
-{
- struct lcs_reply *reply;
-
- LCS_DBF_TEXT(4, trace, "getreply");
-
- reply = kzalloc(sizeof(struct lcs_reply), GFP_ATOMIC);
- if (!reply)
- return NULL;
- refcount_set(&reply->refcnt, 1);
- reply->sequence_no = cmd->sequence_no;
- reply->received = 0;
- reply->rc = 0;
- init_waitqueue_head(&reply->wait_q);
-
- return reply;
-}
-
-/*
- * Notifier function for lancmd replies. Called from read irq.
- */
-static void
-lcs_notify_lancmd_waiters(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- struct list_head *l, *n;
- struct lcs_reply *reply;
-
- LCS_DBF_TEXT(4, trace, "notiwait");
- spin_lock(&card->lock);
- list_for_each_safe(l, n, &card->lancmd_waiters) {
- reply = list_entry(l, struct lcs_reply, list);
- if (reply->sequence_no == cmd->sequence_no) {
- lcs_get_reply(reply);
- list_del_init(&reply->list);
- if (reply->callback != NULL)
- reply->callback(card, cmd);
- reply->received = 1;
- reply->rc = cmd->return_code;
- wake_up(&reply->wait_q);
- lcs_put_reply(reply);
- break;
- }
- }
- spin_unlock(&card->lock);
-}
-
-/*
- * Emit buffer of a lan command.
- */
-static void
-lcs_lancmd_timeout(struct timer_list *t)
-{
- struct lcs_reply *reply = from_timer(reply, t, timer);
- struct lcs_reply *list_reply, *r;
- unsigned long flags;
-
- LCS_DBF_TEXT(4, trace, "timeout");
- spin_lock_irqsave(&reply->card->lock, flags);
- list_for_each_entry_safe(list_reply, r,
- &reply->card->lancmd_waiters,list) {
- if (reply == list_reply) {
- lcs_get_reply(reply);
- list_del_init(&reply->list);
- spin_unlock_irqrestore(&reply->card->lock, flags);
- reply->received = 1;
- reply->rc = -ETIME;
- wake_up(&reply->wait_q);
- lcs_put_reply(reply);
- return;
- }
- }
- spin_unlock_irqrestore(&reply->card->lock, flags);
-}
-
-static int
-lcs_send_lancmd(struct lcs_card *card, struct lcs_buffer *buffer,
- void (*reply_callback)(struct lcs_card *, struct lcs_cmd *))
-{
- struct lcs_reply *reply;
- struct lcs_cmd *cmd;
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT(4, trace, "sendcmd");
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->return_code = 0;
- cmd->sequence_no = card->sequence_no++;
- reply = lcs_alloc_reply(cmd);
- if (!reply)
- return -ENOMEM;
- reply->callback = reply_callback;
- reply->card = card;
- spin_lock_irqsave(&card->lock, flags);
- list_add_tail(&reply->list, &card->lancmd_waiters);
- spin_unlock_irqrestore(&card->lock, flags);
-
- buffer->callback = lcs_release_buffer;
- rc = lcs_ready_buffer(&card->write, buffer);
- if (rc)
- return rc;
- timer_setup(&reply->timer, lcs_lancmd_timeout, 0);
- mod_timer(&reply->timer, jiffies + HZ * card->lancmd_timeout);
- wait_event(reply->wait_q, reply->received);
- del_timer_sync(&reply->timer);
- LCS_DBF_TEXT_(4, trace, "rc:%d",reply->rc);
- rc = reply->rc;
- lcs_put_reply(reply);
- return rc ? -EIO : 0;
-}
-
-/*
- * LCS startup command
- */
-static int
-lcs_send_startup(struct lcs_card *card, __u8 initiator)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "startup");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_STARTUP;
- cmd->initiator = initiator;
- cmd->cmd.lcs_startup.buff_size = LCS_IOBUFFERSIZE;
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * LCS shutdown command
- */
-static int
-lcs_send_shutdown(struct lcs_card *card)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "shutdown");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_SHUTDOWN;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * LCS lanstat command
- */
-static void
-__lcs_lanstat_cb(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(2, trace, "statcb");
- memcpy(card->mac, cmd->cmd.lcs_lanstat_cmd.mac_addr, LCS_MAC_LENGTH);
-}
-
-static int
-lcs_send_lanstat(struct lcs_card *card)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2,trace, "cmdstat");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- /* Setup lanstat command. */
- cmd->cmd_code = LCS_CMD_LANSTAT;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_std_cmd.lan_type = card->lan_type;
- cmd->cmd.lcs_std_cmd.portno = card->portno;
- return lcs_send_lancmd(card, buffer, __lcs_lanstat_cb);
-}
-
-/*
- * send stoplan command
- */
-static int
-lcs_send_stoplan(struct lcs_card *card, __u8 initiator)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmdstpln");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_STOPLAN;
- cmd->initiator = initiator;
- cmd->cmd.lcs_std_cmd.lan_type = card->lan_type;
- cmd->cmd.lcs_std_cmd.portno = card->portno;
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * send startlan command
- */
-static void
-__lcs_send_startlan_cb(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(2, trace, "srtlancb");
- card->lan_type = cmd->cmd.lcs_std_cmd.lan_type;
- card->portno = cmd->cmd.lcs_std_cmd.portno;
-}
-
-static int
-lcs_send_startlan(struct lcs_card *card, __u8 initiator)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmdstaln");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_STARTLAN;
- cmd->initiator = initiator;
- cmd->cmd.lcs_std_cmd.lan_type = card->lan_type;
- cmd->cmd.lcs_std_cmd.portno = card->portno;
- return lcs_send_lancmd(card, buffer, __lcs_send_startlan_cb);
-}
-
-#ifdef CONFIG_IP_MULTICAST
-/*
- * send setipm command (Multicast)
- */
-static int
-lcs_send_setipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmdsetim");
- buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_SETIPM;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_qipassist.lan_type = card->lan_type;
- cmd->cmd.lcs_qipassist.portno = card->portno;
- cmd->cmd.lcs_qipassist.version = 4;
- cmd->cmd.lcs_qipassist.num_ip_pairs = 1;
- memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair,
- &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair));
- LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr);
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * send delipm command (Multicast)
- */
-static int
-lcs_send_delipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmddelim");
- buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_DELIPM;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_qipassist.lan_type = card->lan_type;
- cmd->cmd.lcs_qipassist.portno = card->portno;
- cmd->cmd.lcs_qipassist.version = 4;
- cmd->cmd.lcs_qipassist.num_ip_pairs = 1;
- memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair,
- &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair));
- LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr);
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * check if multicast is supported by LCS
- */
-static void
-__lcs_check_multicast_cb(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(2, trace, "chkmccb");
- card->ip_assists_supported =
- cmd->cmd.lcs_qipassist.ip_assists_supported;
- card->ip_assists_enabled =
- cmd->cmd.lcs_qipassist.ip_assists_enabled;
-}
-
-static int
-lcs_check_multicast_support(struct lcs_card *card)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
- int rc;
-
- LCS_DBF_TEXT(2, trace, "cmdqipa");
- /* Send query ipassist. */
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_QIPASSIST;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_qipassist.lan_type = card->lan_type;
- cmd->cmd.lcs_qipassist.portno = card->portno;
- cmd->cmd.lcs_qipassist.version = 4;
- cmd->cmd.lcs_qipassist.num_ip_pairs = 1;
- rc = lcs_send_lancmd(card, buffer, __lcs_check_multicast_cb);
- if (rc != 0) {
- pr_err("Query IPAssist failed. Assuming unsupported!\n");
- return -EOPNOTSUPP;
- }
- if (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT)
- return 0;
- return -EOPNOTSUPP;
-}
-
-/*
- * set or del multicast address on LCS card
- */
-static void
-lcs_fix_multicast_list(struct lcs_card *card)
-{
- struct list_head failed_list;
- struct lcs_ipm_list *ipm, *tmp;
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT(4,trace, "fixipm");
- INIT_LIST_HEAD(&failed_list);
- spin_lock_irqsave(&card->ipm_lock, flags);
-list_modified:
- list_for_each_entry_safe(ipm, tmp, &card->ipm_list, list){
- switch (ipm->ipm_state) {
- case LCS_IPM_STATE_SET_REQUIRED:
- /* del from ipm_list so no one else can tamper with
- * this entry */
- list_del_init(&ipm->list);
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- rc = lcs_send_setipm(card, ipm);
- spin_lock_irqsave(&card->ipm_lock, flags);
- if (rc) {
- pr_info("Adding multicast address failed."
- " Table possibly full!\n");
- /* store ipm in failed list -> will be added
- * to ipm_list again, so a retry will be done
- * during the next call of this function */
- list_add_tail(&ipm->list, &failed_list);
- } else {
- ipm->ipm_state = LCS_IPM_STATE_ON_CARD;
- /* re-insert into ipm_list */
- list_add_tail(&ipm->list, &card->ipm_list);
- }
- goto list_modified;
- case LCS_IPM_STATE_DEL_REQUIRED:
- list_del(&ipm->list);
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- lcs_send_delipm(card, ipm);
- spin_lock_irqsave(&card->ipm_lock, flags);
- kfree(ipm);
- goto list_modified;
- case LCS_IPM_STATE_ON_CARD:
- break;
- }
- }
- /* re-insert all entries from the failed_list into ipm_list */
- list_for_each_entry_safe(ipm, tmp, &failed_list, list)
- list_move_tail(&ipm->list, &card->ipm_list);
-
- spin_unlock_irqrestore(&card->ipm_lock, flags);
-}
-
-/*
- * get mac address for the relevant Multicast address
- */
-static void
-lcs_get_mac_for_ipm(__be32 ipm, char *mac, struct net_device *dev)
-{
- LCS_DBF_TEXT(4,trace, "getmac");
- ip_eth_mc_map(ipm, mac);
-}
-
-/*
- * function called by net device to handle multicast address relevant things
- */
-static void lcs_remove_mc_addresses(struct lcs_card *card,
- struct in_device *in4_dev)
-{
- struct ip_mc_list *im4;
- struct list_head *l;
- struct lcs_ipm_list *ipm;
- unsigned long flags;
- char buf[MAX_ADDR_LEN];
-
- LCS_DBF_TEXT(4, trace, "remmclst");
- spin_lock_irqsave(&card->ipm_lock, flags);
- list_for_each(l, &card->ipm_list) {
- ipm = list_entry(l, struct lcs_ipm_list, list);
- for (im4 = rcu_dereference(in4_dev->mc_list);
- im4 != NULL; im4 = rcu_dereference(im4->next_rcu)) {
- lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev);
- if ( (ipm->ipm.ip_addr == im4->multiaddr) &&
- (memcmp(buf, &ipm->ipm.mac_addr,
- LCS_MAC_LENGTH) == 0) )
- break;
- }
- if (im4 == NULL)
- ipm->ipm_state = LCS_IPM_STATE_DEL_REQUIRED;
- }
- spin_unlock_irqrestore(&card->ipm_lock, flags);
-}
-
-static struct lcs_ipm_list *lcs_check_addr_entry(struct lcs_card *card,
- struct ip_mc_list *im4,
- char *buf)
-{
- struct lcs_ipm_list *tmp, *ipm = NULL;
- struct list_head *l;
- unsigned long flags;
-
- LCS_DBF_TEXT(4, trace, "chkmcent");
- spin_lock_irqsave(&card->ipm_lock, flags);
- list_for_each(l, &card->ipm_list) {
- tmp = list_entry(l, struct lcs_ipm_list, list);
- if ( (tmp->ipm.ip_addr == im4->multiaddr) &&
- (memcmp(buf, &tmp->ipm.mac_addr,
- LCS_MAC_LENGTH) == 0) ) {
- ipm = tmp;
- break;
- }
- }
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- return ipm;
-}
-
-static void lcs_set_mc_addresses(struct lcs_card *card,
- struct in_device *in4_dev)
-{
-
- struct ip_mc_list *im4;
- struct lcs_ipm_list *ipm;
- char buf[MAX_ADDR_LEN];
- unsigned long flags;
-
- LCS_DBF_TEXT(4, trace, "setmclst");
- for (im4 = rcu_dereference(in4_dev->mc_list); im4 != NULL;
- im4 = rcu_dereference(im4->next_rcu)) {
- lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev);
- ipm = lcs_check_addr_entry(card, im4, buf);
- if (ipm != NULL)
- continue; /* Address already in list. */
- ipm = kzalloc(sizeof(struct lcs_ipm_list), GFP_ATOMIC);
- if (ipm == NULL) {
- pr_info("Not enough memory to add"
- " new multicast entry!\n");
- break;
- }
- memcpy(&ipm->ipm.mac_addr, buf, LCS_MAC_LENGTH);
- ipm->ipm.ip_addr = im4->multiaddr;
- ipm->ipm_state = LCS_IPM_STATE_SET_REQUIRED;
- spin_lock_irqsave(&card->ipm_lock, flags);
- LCS_DBF_HEX(2,trace,&ipm->ipm.ip_addr,4);
- list_add(&ipm->list, &card->ipm_list);
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- }
-}
-
-static int
-lcs_register_mc_addresses(void *data)
-{
- struct lcs_card *card;
- struct in_device *in4_dev;
-
- card = (struct lcs_card *) data;
-
- if (!lcs_do_run_thread(card, LCS_SET_MC_THREAD))
- return 0;
- LCS_DBF_TEXT(4, trace, "regmulti");
-
- in4_dev = in_dev_get(card->dev);
- if (in4_dev == NULL)
- goto out;
- rcu_read_lock();
- lcs_remove_mc_addresses(card,in4_dev);
- lcs_set_mc_addresses(card, in4_dev);
- rcu_read_unlock();
- in_dev_put(in4_dev);
-
- netif_carrier_off(card->dev);
- netif_tx_disable(card->dev);
- wait_event(card->write.wait_q,
- (card->write.state != LCS_CH_STATE_RUNNING));
- lcs_fix_multicast_list(card);
- if (card->state == DEV_STATE_UP) {
- netif_carrier_on(card->dev);
- netif_wake_queue(card->dev);
- }
-out:
- lcs_clear_thread_running_bit(card, LCS_SET_MC_THREAD);
- return 0;
-}
-#endif /* CONFIG_IP_MULTICAST */
-
-/*
- * function called by net device to
- * handle multicast address relevant things
- */
-static void
-lcs_set_multicast_list(struct net_device *dev)
-{
-#ifdef CONFIG_IP_MULTICAST
- struct lcs_card *card;
-
- LCS_DBF_TEXT(4, trace, "setmulti");
- card = (struct lcs_card *) dev->ml_priv;
-
- if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD))
- schedule_work(&card->kernel_thread_starter);
-#endif /* CONFIG_IP_MULTICAST */
-}
-
-static long
-lcs_check_irb_error(struct ccw_device *cdev, struct irb *irb)
-{
- if (!IS_ERR(irb))
- return 0;
-
- switch (PTR_ERR(irb)) {
- case -EIO:
- dev_warn(&cdev->dev,
- "An I/O-error occurred on the LCS device\n");
- LCS_DBF_TEXT(2, trace, "ckirberr");
- LCS_DBF_TEXT_(2, trace, " rc%d", -EIO);
- break;
- case -ETIMEDOUT:
- dev_warn(&cdev->dev,
- "A command timed out on the LCS device\n");
- LCS_DBF_TEXT(2, trace, "ckirberr");
- LCS_DBF_TEXT_(2, trace, " rc%d", -ETIMEDOUT);
- break;
- default:
- dev_warn(&cdev->dev,
- "An error occurred on the LCS device, rc=%ld\n",
- PTR_ERR(irb));
- LCS_DBF_TEXT(2, trace, "ckirberr");
- LCS_DBF_TEXT(2, trace, " rc???");
- }
- return PTR_ERR(irb);
-}
-
-static int
-lcs_get_problem(struct ccw_device *cdev, struct irb *irb)
-{
- int dstat, cstat;
- char *sense;
-
- sense = (char *) irb->ecw;
- cstat = irb->scsw.cmd.cstat;
- dstat = irb->scsw.cmd.dstat;
-
- if (cstat & (SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK |
- SCHN_STAT_CHN_DATA_CHK | SCHN_STAT_CHAIN_CHECK |
- SCHN_STAT_PROT_CHECK | SCHN_STAT_PROG_CHECK)) {
- LCS_DBF_TEXT(2, trace, "CGENCHK");
- return 1;
- }
- if (dstat & DEV_STAT_UNIT_CHECK) {
- if (sense[LCS_SENSE_BYTE_1] &
- LCS_SENSE_RESETTING_EVENT) {
- LCS_DBF_TEXT(2, trace, "REVIND");
- return 1;
- }
- if (sense[LCS_SENSE_BYTE_0] &
- LCS_SENSE_CMD_REJECT) {
- LCS_DBF_TEXT(2, trace, "CMDREJ");
- return 0;
- }
- if ((!sense[LCS_SENSE_BYTE_0]) &&
- (!sense[LCS_SENSE_BYTE_1]) &&
- (!sense[LCS_SENSE_BYTE_2]) &&
- (!sense[LCS_SENSE_BYTE_3])) {
- LCS_DBF_TEXT(2, trace, "ZEROSEN");
- return 0;
- }
- LCS_DBF_TEXT(2, trace, "DGENCHK");
- return 1;
- }
- return 0;
-}
-
-static void
-lcs_schedule_recovery(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, trace, "startrec");
- if (!lcs_set_thread_start_bit(card, LCS_RECOVERY_THREAD))
- schedule_work(&card->kernel_thread_starter);
-}
-
-/*
- * IRQ Handler for LCS channels
- */
-static void
-lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
-{
- struct lcs_card *card;
- struct lcs_channel *channel;
- int rc, index;
- int cstat, dstat;
-
- if (lcs_check_irb_error(cdev, irb))
- return;
-
- card = CARD_FROM_DEV(cdev);
- if (card->read.ccwdev == cdev)
- channel = &card->read;
- else
- channel = &card->write;
-
- cstat = irb->scsw.cmd.cstat;
- dstat = irb->scsw.cmd.dstat;
- LCS_DBF_TEXT_(5, trace, "Rint%s", dev_name(&cdev->dev));
- LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.cstat,
- irb->scsw.cmd.dstat);
- LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.fctl,
- irb->scsw.cmd.actl);
-
- /* Check for channel and device errors presented */
- rc = lcs_get_problem(cdev, irb);
- if (rc || (dstat & DEV_STAT_UNIT_EXCEP)) {
- dev_warn(&cdev->dev,
- "The LCS device stopped because of an error,"
- " dstat=0x%X, cstat=0x%X \n",
- dstat, cstat);
- if (rc) {
- channel->state = LCS_CH_STATE_ERROR;
- }
- }
- if (channel->state == LCS_CH_STATE_ERROR) {
- lcs_schedule_recovery(card);
- wake_up(&card->wait_q);
- return;
- }
- /* How far in the ccw chain have we processed? */
- if ((channel->state != LCS_CH_STATE_INIT) &&
- (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) &&
- (irb->scsw.cmd.cpa != 0)) {
- index = (struct ccw1 *)dma32_to_virt(irb->scsw.cmd.cpa)
- - channel->ccws;
- if ((irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) ||
- (irb->scsw.cmd.cstat & SCHN_STAT_PCI))
- /* Bloody io subsystem tells us lies about cpa... */
- index = (index - 1) & (LCS_NUM_BUFFS - 1);
- while (channel->io_idx != index) {
- __lcs_processed_buffer(channel,
- channel->iob + channel->io_idx);
- channel->io_idx =
- (channel->io_idx + 1) & (LCS_NUM_BUFFS - 1);
- }
- }
-
- if ((irb->scsw.cmd.dstat & DEV_STAT_DEV_END) ||
- (irb->scsw.cmd.dstat & DEV_STAT_CHN_END) ||
- (irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK))
- /* Mark channel as stopped. */
- channel->state = LCS_CH_STATE_STOPPED;
- else if (irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED)
- /* CCW execution stopped on a suspend bit. */
- channel->state = LCS_CH_STATE_SUSPENDED;
- if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) {
- if (irb->scsw.cmd.cc != 0) {
- ccw_device_halt(channel->ccwdev, 0);
- return;
- }
- /* The channel has been stopped by halt_IO. */
- channel->state = LCS_CH_STATE_HALTED;
- }
- if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC)
- channel->state = LCS_CH_STATE_CLEARED;
- /* Do the rest in the tasklet. */
- tasklet_schedule(&channel->irq_tasklet);
-}
-
-/*
- * Tasklet for IRQ handler
- */
-static void
-lcs_tasklet(unsigned long data)
-{
- unsigned long flags;
- struct lcs_channel *channel;
- struct lcs_buffer *iob;
- int buf_idx;
-
- channel = (struct lcs_channel *) data;
- LCS_DBF_TEXT_(5, trace, "tlet%s", dev_name(&channel->ccwdev->dev));
-
- /* Check for processed buffers. */
- iob = channel->iob;
- buf_idx = channel->buf_idx;
- while (iob[buf_idx].state == LCS_BUF_STATE_PROCESSED) {
- /* Do the callback thing. */
- if (iob[buf_idx].callback != NULL)
- iob[buf_idx].callback(channel, iob + buf_idx);
- buf_idx = (buf_idx + 1) & (LCS_NUM_BUFFS - 1);
- }
- channel->buf_idx = buf_idx;
-
- if (channel->state == LCS_CH_STATE_STOPPED)
- lcs_start_channel(channel);
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- if (channel->state == LCS_CH_STATE_SUSPENDED &&
- channel->iob[channel->io_idx].state == LCS_BUF_STATE_READY)
- __lcs_resume_channel(channel);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
-
- /* Something happened on the channel. Wake up waiters. */
- wake_up(&channel->wait_q);
-}
-
-/*
- * Finish current tx buffer and make it ready for transmit.
- */
-static void
-__lcs_emit_txbuffer(struct lcs_card *card)
-{
- LCS_DBF_TEXT(5, trace, "emittx");
- *(__u16 *)(card->tx_buffer->data + card->tx_buffer->count) = 0;
- card->tx_buffer->count += 2;
- lcs_ready_buffer(&card->write, card->tx_buffer);
- card->tx_buffer = NULL;
- card->tx_emitted++;
-}
-
-/*
- * Callback for finished tx buffers.
- */
-static void
-lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- struct lcs_card *card;
-
- LCS_DBF_TEXT(5, trace, "txbuffcb");
- /* Put buffer back to pool. */
- lcs_release_buffer(channel, buffer);
- card = container_of(channel, struct lcs_card, write);
- if (netif_queue_stopped(card->dev) && netif_carrier_ok(card->dev))
- netif_wake_queue(card->dev);
- spin_lock(&card->lock);
- card->tx_emitted--;
- if (card->tx_emitted <= 0 && card->tx_buffer != NULL)
- /*
- * Last running tx buffer has finished. Submit partially
- * filled current buffer.
- */
- __lcs_emit_txbuffer(card);
- spin_unlock(&card->lock);
-}
-
-/*
- * Packet transmit function called by network stack
- */
-static netdev_tx_t __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
- struct net_device *dev)
-{
- struct lcs_header *header;
- int rc = NETDEV_TX_OK;
-
- LCS_DBF_TEXT(5, trace, "hardxmit");
- if (skb == NULL) {
- card->stats.tx_dropped++;
- card->stats.tx_errors++;
- return NETDEV_TX_OK;
- }
- if (card->state != DEV_STATE_UP) {
- dev_kfree_skb(skb);
- card->stats.tx_dropped++;
- card->stats.tx_errors++;
- card->stats.tx_carrier_errors++;
- return NETDEV_TX_OK;
- }
- if (skb->protocol == htons(ETH_P_IPV6)) {
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
- }
- netif_stop_queue(card->dev);
- spin_lock(&card->lock);
- if (card->tx_buffer != NULL &&
- card->tx_buffer->count + sizeof(struct lcs_header) +
- skb->len + sizeof(u16) > LCS_IOBUFFERSIZE)
- /* skb too big for current tx buffer. */
- __lcs_emit_txbuffer(card);
- if (card->tx_buffer == NULL) {
- /* Get new tx buffer */
- card->tx_buffer = lcs_get_buffer(&card->write);
- if (card->tx_buffer == NULL) {
- card->stats.tx_dropped++;
- rc = NETDEV_TX_BUSY;
- goto out;
- }
- card->tx_buffer->callback = lcs_txbuffer_cb;
- card->tx_buffer->count = 0;
- }
- header = (struct lcs_header *)
- (card->tx_buffer->data + card->tx_buffer->count);
- card->tx_buffer->count += skb->len + sizeof(struct lcs_header);
- header->offset = card->tx_buffer->count;
- header->type = card->lan_type;
- header->slot = card->portno;
- skb_copy_from_linear_data(skb, header + 1, skb->len);
- spin_unlock(&card->lock);
- card->stats.tx_bytes += skb->len;
- card->stats.tx_packets++;
- dev_kfree_skb(skb);
- netif_wake_queue(card->dev);
- spin_lock(&card->lock);
- if (card->tx_emitted <= 0 && card->tx_buffer != NULL)
- /* If this is the first tx buffer emit it immediately. */
- __lcs_emit_txbuffer(card);
-out:
- spin_unlock(&card->lock);
- return rc;
-}
-
-static netdev_tx_t lcs_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(5, trace, "pktxmit");
- card = (struct lcs_card *) dev->ml_priv;
- rc = __lcs_start_xmit(card, skb, dev);
- return rc;
-}
-
-/*
- * send startlan and lanstat command to make LCS device ready
- */
-static int
-lcs_startlan_auto(struct lcs_card *card)
-{
- int rc;
-
- LCS_DBF_TEXT(2, trace, "strtauto");
- card->lan_type = LCS_FRAME_TYPE_ENET;
- rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP);
- if (rc == 0)
- return 0;
-
- return -EIO;
-}
-
-static int
-lcs_startlan(struct lcs_card *card)
-{
- int rc, i;
-
- LCS_DBF_TEXT(2, trace, "startlan");
- rc = 0;
- if (card->portno != LCS_INVALID_PORT_NO) {
- if (card->lan_type == LCS_FRAME_TYPE_AUTO)
- rc = lcs_startlan_auto(card);
- else
- rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP);
- } else {
- for (i = 0; i <= 16; i++) {
- card->portno = i;
- if (card->lan_type != LCS_FRAME_TYPE_AUTO)
- rc = lcs_send_startlan(card,
- LCS_INITIATOR_TCPIP);
- else
- /* autodetecting lan type */
- rc = lcs_startlan_auto(card);
- if (rc == 0)
- break;
- }
- }
- if (rc == 0)
- return lcs_send_lanstat(card);
- return rc;
-}
-
-/*
- * LCS detect function
- * setup channels and make them I/O ready
- */
-static int
-lcs_detect(struct lcs_card *card)
-{
- int rc = 0;
-
- LCS_DBF_TEXT(2, setup, "lcsdetct");
- /* start/reset card */
- if (card->dev)
- netif_stop_queue(card->dev);
- rc = lcs_stop_channels(card);
- if (rc == 0) {
- rc = lcs_start_channels(card);
- if (rc == 0) {
- rc = lcs_send_startup(card, LCS_INITIATOR_TCPIP);
- if (rc == 0)
- rc = lcs_startlan(card);
- }
- }
- if (rc == 0) {
- card->state = DEV_STATE_UP;
- } else {
- card->state = DEV_STATE_DOWN;
- card->write.state = LCS_CH_STATE_INIT;
- card->read.state = LCS_CH_STATE_INIT;
- }
- return rc;
-}
-
-/*
- * LCS Stop card
- */
-static int
-lcs_stopcard(struct lcs_card *card)
-{
- int rc;
-
- LCS_DBF_TEXT(3, setup, "stopcard");
-
- if (card->read.state != LCS_CH_STATE_STOPPED &&
- card->write.state != LCS_CH_STATE_STOPPED &&
- card->read.state != LCS_CH_STATE_ERROR &&
- card->write.state != LCS_CH_STATE_ERROR &&
- card->state == DEV_STATE_UP) {
- lcs_clear_multicast_list(card);
- rc = lcs_send_stoplan(card,LCS_INITIATOR_TCPIP);
- rc = lcs_send_shutdown(card);
- }
- rc = lcs_stop_channels(card);
- card->state = DEV_STATE_DOWN;
-
- return rc;
-}
-
-/*
- * Kernel Thread helper functions for LGW initiated commands
- */
-static void
-lcs_start_kernel_thread(struct work_struct *work)
-{
- struct lcs_card *card = container_of(work, struct lcs_card, kernel_thread_starter);
- LCS_DBF_TEXT(5, trace, "krnthrd");
- if (lcs_do_start_thread(card, LCS_RECOVERY_THREAD))
- kthread_run(lcs_recovery, card, "lcs_recover");
-#ifdef CONFIG_IP_MULTICAST
- if (lcs_do_start_thread(card, LCS_SET_MC_THREAD))
- kthread_run(lcs_register_mc_addresses, card, "regipm");
-#endif
-}
-
-/*
- * Process control frames.
- */
-static void
-lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(5, trace, "getctrl");
- if (cmd->initiator == LCS_INITIATOR_LGW) {
- switch(cmd->cmd_code) {
- case LCS_CMD_STARTUP:
- case LCS_CMD_STARTLAN:
- lcs_schedule_recovery(card);
- break;
- case LCS_CMD_STOPLAN:
- if (card->dev) {
- pr_warn("Stoplan for %s initiated by LGW\n",
- card->dev->name);
- netif_carrier_off(card->dev);
- }
- break;
- default:
- LCS_DBF_TEXT(5, trace, "noLGWcmd");
- break;
- }
- } else
- lcs_notify_lancmd_waiters(card, cmd);
-}
-
-/*
- * Unpack network packet.
- */
-static void
-lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len)
-{
- struct sk_buff *skb;
-
- LCS_DBF_TEXT(5, trace, "getskb");
- if (card->dev == NULL ||
- card->state != DEV_STATE_UP)
- /* The card isn't up. Ignore the packet. */
- return;
-
- skb = dev_alloc_skb(skb_len);
- if (skb == NULL) {
- dev_err(&card->dev->dev,
- " Allocating a socket buffer to interface %s failed\n",
- card->dev->name);
- card->stats.rx_dropped++;
- return;
- }
- skb_put_data(skb, skb_data, skb_len);
- skb->protocol = card->lan_type_trans(skb, card->dev);
- card->stats.rx_bytes += skb_len;
- card->stats.rx_packets++;
- if (skb->protocol == htons(ETH_P_802_2))
- *((__u32 *)skb->cb) = ++card->pkt_seq;
- netif_rx(skb);
-}
-
-/*
- * LCS main routine to get packets and lancmd replies from the buffers
- */
-static void
-lcs_get_frames_cb(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- struct lcs_card *card;
- struct lcs_header *lcs_hdr;
- __u16 offset;
-
- LCS_DBF_TEXT(5, trace, "lcsgtpkt");
- lcs_hdr = (struct lcs_header *) buffer->data;
- if (lcs_hdr->offset == LCS_ILLEGAL_OFFSET) {
- LCS_DBF_TEXT(4, trace, "-eiogpkt");
- return;
- }
- card = container_of(channel, struct lcs_card, read);
- offset = 0;
- while (lcs_hdr->offset != 0) {
- if (lcs_hdr->offset <= 0 ||
- lcs_hdr->offset > LCS_IOBUFFERSIZE ||
- lcs_hdr->offset < offset) {
- /* Offset invalid. */
- card->stats.rx_length_errors++;
- card->stats.rx_errors++;
- return;
- }
- if (lcs_hdr->type == LCS_FRAME_TYPE_CONTROL)
- lcs_get_control(card, (struct lcs_cmd *) lcs_hdr);
- else if (lcs_hdr->type == LCS_FRAME_TYPE_ENET)
- lcs_get_skb(card, (char *)(lcs_hdr + 1),
- lcs_hdr->offset - offset -
- sizeof(struct lcs_header));
- else
- dev_info_once(&card->dev->dev,
- "Unknown frame type %d\n",
- lcs_hdr->type);
- offset = lcs_hdr->offset;
- lcs_hdr->offset = LCS_ILLEGAL_OFFSET;
- lcs_hdr = (struct lcs_header *) (buffer->data + offset);
- }
- /* The buffer is now empty. Make it ready again. */
- lcs_ready_buffer(&card->read, buffer);
-}
-
-/*
- * get network statistics for ifconfig and other user programs
- */
-static struct net_device_stats *
-lcs_getstats(struct net_device *dev)
-{
- struct lcs_card *card;
-
- LCS_DBF_TEXT(4, trace, "netstats");
- card = (struct lcs_card *) dev->ml_priv;
- return &card->stats;
-}
-
-/*
- * stop lcs device
- * This function will be called by user doing ifconfig xxx down
- */
-static int
-lcs_stop_device(struct net_device *dev)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(2, trace, "stopdev");
- card = (struct lcs_card *) dev->ml_priv;
- netif_carrier_off(dev);
- netif_tx_disable(dev);
- dev->flags &= ~IFF_UP;
- wait_event(card->write.wait_q,
- (card->write.state != LCS_CH_STATE_RUNNING));
- rc = lcs_stopcard(card);
- if (rc)
- dev_err(&card->dev->dev,
- " Shutting down the LCS device failed\n");
- return rc;
-}
-
-/*
- * start lcs device and make it runnable
- * This function will be called by user doing ifconfig xxx up
- */
-static int
-lcs_open_device(struct net_device *dev)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(2, trace, "opendev");
- card = (struct lcs_card *) dev->ml_priv;
- /* initialize statistics */
- rc = lcs_detect(card);
- if (rc) {
- pr_err("Error in opening device!\n");
-
- } else {
- dev->flags |= IFF_UP;
- netif_carrier_on(dev);
- netif_wake_queue(dev);
- card->state = DEV_STATE_UP;
- }
- return rc;
-}
-
-/*
- * show function for portno called by cat or similar things
- */
-static ssize_t
-lcs_portno_show (struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct lcs_card *card;
-
- card = dev_get_drvdata(dev);
-
- if (!card)
- return 0;
-
- return sysfs_emit(buf, "%d\n", card->portno);
-}
-
-/*
- * store the value which is piped to file portno
- */
-static ssize_t
-lcs_portno_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
- struct lcs_card *card;
- int rc;
- s16 value;
-
- card = dev_get_drvdata(dev);
-
- if (!card)
- return 0;
-
- rc = kstrtos16(buf, 0, &value);
- if (rc)
- return -EINVAL;
- /* TODO: sanity checks */
- card->portno = value;
- if (card->dev)
- card->dev->dev_port = card->portno;
-
- return count;
-
-}
-
-static DEVICE_ATTR(portno, 0644, lcs_portno_show, lcs_portno_store);
-
-static const char *lcs_type[] = {
- "not a channel",
- "2216 parallel",
- "2216 channel",
- "OSA LCS card",
- "unknown channel type",
- "unsupported channel type",
-};
-
-static ssize_t
-lcs_type_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct ccwgroup_device *cgdev;
-
- cgdev = to_ccwgroupdev(dev);
- if (!cgdev)
- return -ENODEV;
-
- return sysfs_emit(buf, "%s\n",
- lcs_type[cgdev->cdev[0]->id.driver_info]);
-}
-
-static DEVICE_ATTR(type, 0444, lcs_type_show, NULL);
-
-static ssize_t
-lcs_timeout_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct lcs_card *card;
-
- card = dev_get_drvdata(dev);
-
- return card ? sysfs_emit(buf, "%u\n", card->lancmd_timeout) : 0;
-}
-
-static ssize_t
-lcs_timeout_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
- struct lcs_card *card;
- unsigned int value;
- int rc;
-
- card = dev_get_drvdata(dev);
-
- if (!card)
- return 0;
-
- rc = kstrtouint(buf, 0, &value);
- if (rc)
- return -EINVAL;
- /* TODO: sanity checks */
- card->lancmd_timeout = value;
-
- return count;
-
-}
-
-static DEVICE_ATTR(lancmd_timeout, 0644, lcs_timeout_show, lcs_timeout_store);
-
-static ssize_t
-lcs_dev_recover_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct lcs_card *card = dev_get_drvdata(dev);
- char *tmp;
- int i;
-
- if (!card)
- return -EINVAL;
- if (card->state != DEV_STATE_UP)
- return -EPERM;
- i = simple_strtoul(buf, &tmp, 16);
- if (i == 1)
- lcs_schedule_recovery(card);
- return count;
-}
-
-static DEVICE_ATTR(recover, 0200, NULL, lcs_dev_recover_store);
-
-static struct attribute * lcs_attrs[] = {
- &dev_attr_portno.attr,
- &dev_attr_type.attr,
- &dev_attr_lancmd_timeout.attr,
- &dev_attr_recover.attr,
- NULL,
-};
-static struct attribute_group lcs_attr_group = {
- .attrs = lcs_attrs,
-};
-static const struct attribute_group *lcs_attr_groups[] = {
- &lcs_attr_group,
- NULL,
-};
-static const struct device_type lcs_devtype = {
- .name = "lcs",
- .groups = lcs_attr_groups,
-};
-
-/*
- * lcs_probe_device is called on establishing a new ccwgroup_device.
- */
-static int
-lcs_probe_device(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
-
- if (!get_device(&ccwgdev->dev))
- return -ENODEV;
-
- LCS_DBF_TEXT(2, setup, "add_dev");
- card = lcs_alloc_card();
- if (!card) {
- LCS_DBF_TEXT_(2, setup, " rc%d", -ENOMEM);
- put_device(&ccwgdev->dev);
- return -ENOMEM;
- }
- dev_set_drvdata(&ccwgdev->dev, card);
- ccwgdev->cdev[0]->handler = lcs_irq;
- ccwgdev->cdev[1]->handler = lcs_irq;
- card->gdev = ccwgdev;
- INIT_WORK(&card->kernel_thread_starter, lcs_start_kernel_thread);
- card->thread_start_mask = 0;
- card->thread_allowed_mask = 0;
- card->thread_running_mask = 0;
- ccwgdev->dev.type = &lcs_devtype;
-
- return 0;
-}
-
-static int
-lcs_register_netdev(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
-
- LCS_DBF_TEXT(2, setup, "regnetdv");
- card = dev_get_drvdata(&ccwgdev->dev);
- if (card->dev->reg_state != NETREG_UNINITIALIZED)
- return 0;
- SET_NETDEV_DEV(card->dev, &ccwgdev->dev);
- return register_netdev(card->dev);
-}
-
-/*
- * lcs_new_device will be called by setting the group device online.
- */
-static const struct net_device_ops lcs_netdev_ops = {
- .ndo_open = lcs_open_device,
- .ndo_stop = lcs_stop_device,
- .ndo_get_stats = lcs_getstats,
- .ndo_start_xmit = lcs_start_xmit,
-};
-
-static const struct net_device_ops lcs_mc_netdev_ops = {
- .ndo_open = lcs_open_device,
- .ndo_stop = lcs_stop_device,
- .ndo_get_stats = lcs_getstats,
- .ndo_start_xmit = lcs_start_xmit,
- .ndo_set_rx_mode = lcs_set_multicast_list,
-};
-
-static int
-lcs_new_device(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
- struct net_device *dev=NULL;
- enum lcs_dev_states recover_state;
- int rc;
-
- card = dev_get_drvdata(&ccwgdev->dev);
- if (!card)
- return -ENODEV;
-
- LCS_DBF_TEXT(2, setup, "newdev");
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- card->read.ccwdev = ccwgdev->cdev[0];
- card->write.ccwdev = ccwgdev->cdev[1];
-
- recover_state = card->state;
- rc = ccw_device_set_online(card->read.ccwdev);
- if (rc)
- goto out_err;
- rc = ccw_device_set_online(card->write.ccwdev);
- if (rc)
- goto out_werr;
-
- LCS_DBF_TEXT(3, setup, "lcsnewdv");
-
- lcs_setup_card(card);
- rc = lcs_detect(card);
- if (rc) {
- LCS_DBF_TEXT(2, setup, "dtctfail");
- dev_err(&ccwgdev->dev,
- "Detecting a network adapter for LCS devices"
- " failed with rc=%d (0x%x)\n", rc, rc);
- lcs_stopcard(card);
- goto out;
- }
- if (card->dev) {
- LCS_DBF_TEXT(2, setup, "samedev");
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- goto netdev_out;
- }
- switch (card->lan_type) {
- case LCS_FRAME_TYPE_ENET:
- card->lan_type_trans = eth_type_trans;
- dev = alloc_etherdev(0);
- break;
- default:
- LCS_DBF_TEXT(3, setup, "errinit");
- pr_err(" Initialization failed\n");
- goto out;
- }
- if (!dev)
- goto out;
- card->dev = dev;
- card->dev->ml_priv = card;
- card->dev->netdev_ops = &lcs_netdev_ops;
- card->dev->dev_port = card->portno;
- eth_hw_addr_set(card->dev, card->mac);
-#ifdef CONFIG_IP_MULTICAST
- if (!lcs_check_multicast_support(card))
- card->dev->netdev_ops = &lcs_mc_netdev_ops;
-#endif
-netdev_out:
- lcs_set_allowed_threads(card,0xffffffff);
- if (recover_state == DEV_STATE_RECOVER) {
- lcs_set_multicast_list(card->dev);
- card->dev->flags |= IFF_UP;
- netif_carrier_on(card->dev);
- netif_wake_queue(card->dev);
- card->state = DEV_STATE_UP;
- } else {
- lcs_stopcard(card);
- }
-
- if (lcs_register_netdev(ccwgdev) != 0)
- goto out;
-
- /* Print out supported assists: IPv6 */
- pr_info("LCS device %s %s IPv6 support\n", card->dev->name,
- (card->ip_assists_supported & LCS_IPASS_IPV6_SUPPORT) ?
- "with" : "without");
- /* Print out supported assist: Multicast */
- pr_info("LCS device %s %s Multicast support\n", card->dev->name,
- (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) ?
- "with" : "without");
- return 0;
-out:
-
- ccw_device_set_offline(card->write.ccwdev);
-out_werr:
- ccw_device_set_offline(card->read.ccwdev);
-out_err:
- return -ENODEV;
-}
-
-/*
- * lcs_shutdown_device, called when setting the group device offline.
- */
-static int
-__lcs_shutdown_device(struct ccwgroup_device *ccwgdev, int recovery_mode)
-{
- struct lcs_card *card;
- enum lcs_dev_states recover_state;
- int ret = 0, ret2 = 0, ret3 = 0;
-
- LCS_DBF_TEXT(3, setup, "shtdndev");
- card = dev_get_drvdata(&ccwgdev->dev);
- if (!card)
- return -ENODEV;
- if (recovery_mode == 0) {
- lcs_set_allowed_threads(card, 0);
- if (lcs_wait_for_threads(card, LCS_SET_MC_THREAD))
- return -ERESTARTSYS;
- }
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- recover_state = card->state;
-
- ret = lcs_stop_device(card->dev);
- ret2 = ccw_device_set_offline(card->read.ccwdev);
- ret3 = ccw_device_set_offline(card->write.ccwdev);
- if (!ret)
- ret = (ret2) ? ret2 : ret3;
- if (ret)
- LCS_DBF_TEXT_(3, setup, "1err:%d", ret);
- if (recover_state == DEV_STATE_UP) {
- card->state = DEV_STATE_RECOVER;
- }
- return 0;
-}
-
-static int
-lcs_shutdown_device(struct ccwgroup_device *ccwgdev)
-{
- return __lcs_shutdown_device(ccwgdev, 0);
-}
-
-/*
- * drive lcs recovery after startup and startlan initiated by Lan Gateway
- */
-static int
-lcs_recovery(void *ptr)
-{
- struct lcs_card *card;
- struct ccwgroup_device *gdev;
- int rc;
-
- card = (struct lcs_card *) ptr;
-
- LCS_DBF_TEXT(4, trace, "recover1");
- if (!lcs_do_run_thread(card, LCS_RECOVERY_THREAD))
- return 0;
- LCS_DBF_TEXT(4, trace, "recover2");
- gdev = card->gdev;
- dev_warn(&gdev->dev,
- "A recovery process has been started for the LCS device\n");
- rc = __lcs_shutdown_device(gdev, 1);
- rc = lcs_new_device(gdev);
- if (!rc)
- pr_info("Device %s successfully recovered!\n",
- card->dev->name);
- else
- pr_info("Device %s could not be recovered!\n",
- card->dev->name);
- lcs_clear_thread_running_bit(card, LCS_RECOVERY_THREAD);
- return 0;
-}
-
-/*
- * lcs_remove_device, free buffers and card
- */
-static void
-lcs_remove_device(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
-
- card = dev_get_drvdata(&ccwgdev->dev);
- if (!card)
- return;
-
- LCS_DBF_TEXT(3, setup, "remdev");
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- if (ccwgdev->state == CCWGROUP_ONLINE) {
- lcs_shutdown_device(ccwgdev);
- }
- if (card->dev)
- unregister_netdev(card->dev);
- lcs_cleanup_card(card);
- lcs_free_card(card);
- dev_set_drvdata(&ccwgdev->dev, NULL);
- put_device(&ccwgdev->dev);
-}
-
-static struct ccw_device_id lcs_ids[] = {
- {CCW_DEVICE(0x3088, 0x08), .driver_info = lcs_channel_type_parallel},
- {CCW_DEVICE(0x3088, 0x1f), .driver_info = lcs_channel_type_2216},
- {CCW_DEVICE(0x3088, 0x60), .driver_info = lcs_channel_type_osa2},
- {},
-};
-MODULE_DEVICE_TABLE(ccw, lcs_ids);
-
-static struct ccw_driver lcs_ccw_driver = {
- .driver = {
- .owner = THIS_MODULE,
- .name = "lcs",
- },
- .ids = lcs_ids,
- .probe = ccwgroup_probe_ccwdev,
- .remove = ccwgroup_remove_ccwdev,
- .int_class = IRQIO_LCS,
-};
-
-/*
- * LCS ccwgroup driver registration
- */
-static struct ccwgroup_driver lcs_group_driver = {
- .driver = {
- .owner = THIS_MODULE,
- .name = "lcs",
- },
- .ccw_driver = &lcs_ccw_driver,
- .setup = lcs_probe_device,
- .remove = lcs_remove_device,
- .set_online = lcs_new_device,
- .set_offline = lcs_shutdown_device,
-};
-
-static ssize_t group_store(struct device_driver *ddrv, const char *buf,
- size_t count)
-{
- int err;
- err = ccwgroup_create_dev(lcs_root_dev, &lcs_group_driver, 2, buf);
- return err ? err : count;
-}
-static DRIVER_ATTR_WO(group);
-
-static struct attribute *lcs_drv_attrs[] = {
- &driver_attr_group.attr,
- NULL,
-};
-static struct attribute_group lcs_drv_attr_group = {
- .attrs = lcs_drv_attrs,
-};
-static const struct attribute_group *lcs_drv_attr_groups[] = {
- &lcs_drv_attr_group,
- NULL,
-};
-
-/*
- * LCS Module/Kernel initialization function
- */
-static int
-__init lcs_init_module(void)
-{
- int rc;
-
- pr_info("Loading %s\n", version);
- rc = lcs_register_debug_facility();
- LCS_DBF_TEXT(0, setup, "lcsinit");
- if (rc)
- goto out_err;
- lcs_root_dev = root_device_register("lcs");
- rc = PTR_ERR_OR_ZERO(lcs_root_dev);
- if (rc)
- goto register_err;
- rc = ccw_driver_register(&lcs_ccw_driver);
- if (rc)
- goto ccw_err;
- lcs_group_driver.driver.groups = lcs_drv_attr_groups;
- rc = ccwgroup_driver_register(&lcs_group_driver);
- if (rc)
- goto ccwgroup_err;
- return 0;
-
-ccwgroup_err:
- ccw_driver_unregister(&lcs_ccw_driver);
-ccw_err:
- root_device_unregister(lcs_root_dev);
-register_err:
- lcs_unregister_debug_facility();
-out_err:
- pr_err("Initializing the lcs device driver failed\n");
- return rc;
-}
-
-
-/*
- * LCS module cleanup function
- */
-static void
-__exit lcs_cleanup_module(void)
-{
- pr_info("Terminating lcs module.\n");
- LCS_DBF_TEXT(0, trace, "cleanup");
- ccwgroup_driver_unregister(&lcs_group_driver);
- ccw_driver_unregister(&lcs_ccw_driver);
- root_device_unregister(lcs_root_dev);
- lcs_unregister_debug_facility();
-}
-
-module_init(lcs_init_module);
-module_exit(lcs_cleanup_module);
-
-MODULE_AUTHOR("Frank Pavlic <fpavlic@de.ibm.com>");
-MODULE_DESCRIPTION("S/390 LAN channel station device driver");
-MODULE_LICENSE("GPL");
-
diff --git a/drivers/s390/net/lcs.h b/drivers/s390/net/lcs.h
deleted file mode 100644
index a2699b70b050..000000000000
--- a/drivers/s390/net/lcs.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*lcs.h*/
-
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/workqueue.h>
-#include <linux/refcount.h>
-#include <asm/ccwdev.h>
-
-#define LCS_DBF_TEXT(level, name, text) \
- do { \
- debug_text_event(lcs_dbf_##name, level, text); \
- } while (0)
-
-#define LCS_DBF_HEX(level,name,addr,len) \
-do { \
- debug_event(lcs_dbf_##name,level,(void*)(addr),len); \
-} while (0)
-
-#define LCS_DBF_TEXT_(level,name,text...) \
- do { \
- if (debug_level_enabled(lcs_dbf_##name, level)) { \
- scnprintf(debug_buffer, sizeof(debug_buffer), text); \
- debug_text_event(lcs_dbf_##name, level, debug_buffer); \
- } \
- } while (0)
-
-/**
- * sysfs related stuff
- */
-#define CARD_FROM_DEV(cdev) \
- (struct lcs_card *) dev_get_drvdata( \
- &((struct ccwgroup_device *)dev_get_drvdata(&cdev->dev))->dev);
-
-/**
- * Enum for classifying detected devices.
- */
-enum lcs_channel_types {
- /* Device is not a channel */
- lcs_channel_type_none,
-
- /* Device is a 2216 channel */
- lcs_channel_type_parallel,
-
- /* Device is a 2216 channel */
- lcs_channel_type_2216,
-
- /* Device is a OSA2 card */
- lcs_channel_type_osa2
-};
-
-/**
- * CCW commands used in this driver
- */
-#define LCS_CCW_WRITE 0x01
-#define LCS_CCW_READ 0x02
-#define LCS_CCW_TRANSFER 0x08
-
-/**
- * LCS device status primitives
- */
-#define LCS_CMD_STARTLAN 0x01
-#define LCS_CMD_STOPLAN 0x02
-#define LCS_CMD_LANSTAT 0x04
-#define LCS_CMD_STARTUP 0x07
-#define LCS_CMD_SHUTDOWN 0x08
-#define LCS_CMD_QIPASSIST 0xb2
-#define LCS_CMD_SETIPM 0xb4
-#define LCS_CMD_DELIPM 0xb5
-
-#define LCS_INITIATOR_TCPIP 0x00
-#define LCS_INITIATOR_LGW 0x01
-#define LCS_STD_CMD_SIZE 16
-#define LCS_MULTICAST_CMD_SIZE 404
-
-/**
- * LCS IPASSIST MASKS,only used when multicast is switched on
- */
-/* Not supported by LCS */
-#define LCS_IPASS_ARP_PROCESSING 0x0001
-#define LCS_IPASS_IN_CHECKSUM_SUPPORT 0x0002
-#define LCS_IPASS_OUT_CHECKSUM_SUPPORT 0x0004
-#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008
-#define LCS_IPASS_IP_FILTERING 0x0010
-/* Supported by lcs 3172 */
-#define LCS_IPASS_IPV6_SUPPORT 0x0020
-#define LCS_IPASS_MULTICAST_SUPPORT 0x0040
-
-/**
- * LCS sense byte definitions
- */
-#define LCS_SENSE_BYTE_0 0
-#define LCS_SENSE_BYTE_1 1
-#define LCS_SENSE_BYTE_2 2
-#define LCS_SENSE_BYTE_3 3
-#define LCS_SENSE_INTERFACE_DISCONNECT 0x01
-#define LCS_SENSE_EQUIPMENT_CHECK 0x10
-#define LCS_SENSE_BUS_OUT_CHECK 0x20
-#define LCS_SENSE_INTERVENTION_REQUIRED 0x40
-#define LCS_SENSE_CMD_REJECT 0x80
-#define LCS_SENSE_RESETTING_EVENT 0x80
-#define LCS_SENSE_DEVICE_ONLINE 0x20
-
-/**
- * LCS packet type definitions
- */
-#define LCS_FRAME_TYPE_CONTROL 0
-#define LCS_FRAME_TYPE_ENET 1
-#define LCS_FRAME_TYPE_TR 2
-#define LCS_FRAME_TYPE_FDDI 7
-#define LCS_FRAME_TYPE_AUTO -1
-
-/**
- * some more definitions,we will sort them later
- */
-#define LCS_ILLEGAL_OFFSET 0xffff
-#define LCS_IOBUFFERSIZE 0x5000
-#define LCS_NUM_BUFFS 32 /* needs to be power of 2 */
-#define LCS_MAC_LENGTH 6
-#define LCS_INVALID_PORT_NO -1
-#define LCS_LANCMD_TIMEOUT_DEFAULT 5
-
-/**
- * Multicast state
- */
-#define LCS_IPM_STATE_SET_REQUIRED 0
-#define LCS_IPM_STATE_DEL_REQUIRED 1
-#define LCS_IPM_STATE_ON_CARD 2
-
-/**
- * LCS IP Assist declarations
- * seems to be only used for multicast
- */
-#define LCS_IPASS_ARP_PROCESSING 0x0001
-#define LCS_IPASS_INBOUND_CSUM_SUPP 0x0002
-#define LCS_IPASS_OUTBOUND_CSUM_SUPP 0x0004
-#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008
-#define LCS_IPASS_IP_FILTERING 0x0010
-#define LCS_IPASS_IPV6_SUPPORT 0x0020
-#define LCS_IPASS_MULTICAST_SUPPORT 0x0040
-
-/**
- * LCS Buffer states
- */
-enum lcs_buffer_states {
- LCS_BUF_STATE_EMPTY, /* buffer is empty */
- LCS_BUF_STATE_LOCKED, /* buffer is locked, don't touch */
- LCS_BUF_STATE_READY, /* buffer is ready for read/write */
- LCS_BUF_STATE_PROCESSED,
-};
-
-/**
- * LCS Channel State Machine declarations
- */
-enum lcs_channel_states {
- LCS_CH_STATE_INIT,
- LCS_CH_STATE_HALTED,
- LCS_CH_STATE_STOPPED,
- LCS_CH_STATE_RUNNING,
- LCS_CH_STATE_SUSPENDED,
- LCS_CH_STATE_CLEARED,
- LCS_CH_STATE_ERROR,
-};
-
-/**
- * LCS device state machine
- */
-enum lcs_dev_states {
- DEV_STATE_DOWN,
- DEV_STATE_UP,
- DEV_STATE_RECOVER,
-};
-
-enum lcs_threads {
- LCS_SET_MC_THREAD = 1,
- LCS_RECOVERY_THREAD = 2,
-};
-
-/**
- * LCS struct declarations
- */
-struct lcs_header {
- __u16 offset;
- __u8 type;
- __u8 slot;
-} __attribute__ ((packed));
-
-struct lcs_ip_mac_pair {
- __be32 ip_addr;
- __u8 mac_addr[LCS_MAC_LENGTH];
- __u8 reserved[2];
-} __attribute__ ((packed));
-
-struct lcs_ipm_list {
- struct list_head list;
- struct lcs_ip_mac_pair ipm;
- __u8 ipm_state;
-};
-
-struct lcs_cmd {
- __u16 offset;
- __u8 type;
- __u8 slot;
- __u8 cmd_code;
- __u8 initiator;
- __u16 sequence_no;
- __u16 return_code;
- union {
- struct {
- __u8 lan_type;
- __u8 portno;
- __u16 parameter_count;
- __u8 operator_flags[3];
- __u8 reserved[3];
- } lcs_std_cmd;
- struct {
- __u16 unused1;
- __u16 buff_size;
- __u8 unused2[6];
- } lcs_startup;
- struct {
- __u8 lan_type;
- __u8 portno;
- __u8 unused[10];
- __u8 mac_addr[LCS_MAC_LENGTH];
- __u32 num_packets_deblocked;
- __u32 num_packets_blocked;
- __u32 num_packets_tx_on_lan;
- __u32 num_tx_errors_detected;
- __u32 num_tx_packets_disgarded;
- __u32 num_packets_rx_from_lan;
- __u32 num_rx_errors_detected;
- __u32 num_rx_discarded_nobuffs_avail;
- __u32 num_rx_packets_too_large;
- } lcs_lanstat_cmd;
-#ifdef CONFIG_IP_MULTICAST
- struct {
- __u8 lan_type;
- __u8 portno;
- __u16 num_ip_pairs;
- __u16 ip_assists_supported;
- __u16 ip_assists_enabled;
- __u16 version;
- struct {
- struct lcs_ip_mac_pair
- ip_mac_pair[32];
- __u32 response_data;
- } lcs_ipass_ctlmsg __attribute ((packed));
- } lcs_qipassist __attribute__ ((packed));
-#endif /*CONFIG_IP_MULTICAST */
- } cmd __attribute__ ((packed));
-} __attribute__ ((packed));
-
-/**
- * Forward declarations.
- */
-struct lcs_card;
-struct lcs_channel;
-
-/**
- * Definition of an lcs buffer.
- */
-struct lcs_buffer {
- enum lcs_buffer_states state;
- void *data;
- int count;
- /* Callback for completion notification. */
- void (*callback)(struct lcs_channel *, struct lcs_buffer *);
-};
-
-struct lcs_reply {
- struct list_head list;
- __u16 sequence_no;
- refcount_t refcnt;
- /* Callback for completion notification. */
- void (*callback)(struct lcs_card *, struct lcs_cmd *);
- wait_queue_head_t wait_q;
- struct lcs_card *card;
- struct timer_list timer;
- int received;
- int rc;
-};
-
-/**
- * Definition of an lcs channel
- */
-struct lcs_channel {
- enum lcs_channel_states state;
- struct ccw_device *ccwdev;
- struct ccw1 ccws[LCS_NUM_BUFFS + 1];
- wait_queue_head_t wait_q;
- struct tasklet_struct irq_tasklet;
- struct lcs_buffer iob[LCS_NUM_BUFFS];
- int io_idx;
- int buf_idx;
-};
-
-
-/**
- * definition of the lcs card
- */
-struct lcs_card {
- spinlock_t lock;
- spinlock_t ipm_lock;
- enum lcs_dev_states state;
- struct net_device *dev;
- struct net_device_stats stats;
- __be16 (*lan_type_trans)(struct sk_buff *skb,
- struct net_device *dev);
- struct ccwgroup_device *gdev;
- struct lcs_channel read;
- struct lcs_channel write;
- struct lcs_buffer *tx_buffer;
- int tx_emitted;
- struct list_head lancmd_waiters;
- int lancmd_timeout;
-
- struct work_struct kernel_thread_starter;
- spinlock_t mask_lock;
- unsigned long thread_start_mask;
- unsigned long thread_running_mask;
- unsigned long thread_allowed_mask;
- wait_queue_head_t wait_q;
-
-#ifdef CONFIG_IP_MULTICAST
- struct list_head ipm_list;
-#endif
- __u8 mac[LCS_MAC_LENGTH];
- __u16 ip_assists_supported;
- __u16 ip_assists_enabled;
- __s8 lan_type;
- __u32 pkt_seq;
- __u16 sequence_no;
- __s16 portno;
- /* Some info copied from probeinfo */
- u8 device_forced;
- u8 max_port_no;
- u8 hint_port_no;
- s16 port_protocol_no;
-} __attribute__ ((aligned(8)));
-
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7c0980db77b3..2fecf66661e9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -447,7 +447,7 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
if (!budget)
budget = BUSY_POLL_BUDGET;
- if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) {
+ if (napi_id_valid(napi_id) && ep_busy_loop_on(ep)) {
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end,
ep, prefer_busy_poll, budget);
if (ep_events_available(ep))
@@ -492,7 +492,7 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
* or
* Nothing to do if we already have this ID
*/
- if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id)
+ if (!napi_id_valid(napi_id) || napi_id == ep->napi_id)
return;
/* record NAPI ID for use in next busy poll */
@@ -546,7 +546,7 @@ static void ep_suspend_napi_irqs(struct eventpoll *ep)
{
unsigned int napi_id = READ_ONCE(ep->napi_id);
- if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
+ if (napi_id_valid(napi_id) && READ_ONCE(ep->prefer_busy_poll))
napi_suspend_irqs(napi_id);
}
@@ -554,7 +554,7 @@ static void ep_resume_napi_irqs(struct eventpoll *ep)
{
unsigned int napi_id = READ_ONCE(ep->napi_id);
- if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
+ if (napi_id_valid(napi_id) && READ_ONCE(ep->prefer_busy_poll))
napi_resume_irqs(napi_id);
}
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 13a11f3c09b8..4811b9a14604 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -154,7 +154,10 @@ enum virtchnl_ops {
VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55,
VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56,
VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
- /* opcode 57 - 65 are reserved */
+ /* opcode 58 and 59 are reserved */
+ VIRTCHNL_OP_1588_PTP_GET_CAPS = 60,
+ VIRTCHNL_OP_1588_PTP_GET_TIME = 61,
+ /* opcode 62 - 65 are reserved */
VIRTCHNL_OP_GET_QOS_CAPS = 66,
/* opcode 68 through 111 are reserved */
VIRTCHNL_OP_CONFIG_QUEUE_BW = 112,
@@ -270,6 +273,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27)
#define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28)
#define VIRTCHNL_VF_OFFLOAD_QOS BIT(29)
+#define VIRTCHNL_VF_CAP_PTP BIT(31)
#define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
VIRTCHNL_VF_OFFLOAD_VLAN | \
@@ -309,6 +313,60 @@ struct virtchnl_txq_info {
VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info);
+/* RX descriptor IDs (range from 0 to 63) */
+enum virtchnl_rx_desc_ids {
+ VIRTCHNL_RXDID_0_16B_BASE = 0,
+ VIRTCHNL_RXDID_1_32B_BASE = 1,
+ VIRTCHNL_RXDID_2_FLEX_SQ_NIC = 2,
+ VIRTCHNL_RXDID_3_FLEX_SQ_SW = 3,
+ VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB = 4,
+ VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL = 5,
+ VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2 = 6,
+ VIRTCHNL_RXDID_7_HW_RSVD = 7,
+ /* 8 through 15 are reserved */
+ VIRTCHNL_RXDID_16_COMMS_GENERIC = 16,
+ VIRTCHNL_RXDID_17_COMMS_AUX_VLAN = 17,
+ VIRTCHNL_RXDID_18_COMMS_AUX_IPV4 = 18,
+ VIRTCHNL_RXDID_19_COMMS_AUX_IPV6 = 19,
+ VIRTCHNL_RXDID_20_COMMS_AUX_FLOW = 20,
+ VIRTCHNL_RXDID_21_COMMS_AUX_TCP = 21,
+ /* 22 through 63 are reserved */
+};
+
+#define VIRTCHNL_RXDID_BIT(x) BIT_ULL(VIRTCHNL_RXDID_##x)
+
+/* RX descriptor ID bitmasks */
+enum virtchnl_rx_desc_id_bitmasks {
+ VIRTCHNL_RXDID_0_16B_BASE_M = VIRTCHNL_RXDID_BIT(0_16B_BASE),
+ VIRTCHNL_RXDID_1_32B_BASE_M = VIRTCHNL_RXDID_BIT(1_32B_BASE),
+ VIRTCHNL_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL_RXDID_BIT(2_FLEX_SQ_NIC),
+ VIRTCHNL_RXDID_3_FLEX_SQ_SW_M = VIRTCHNL_RXDID_BIT(3_FLEX_SQ_SW),
+ VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB_M = VIRTCHNL_RXDID_BIT(4_FLEX_SQ_NIC_VEB),
+ VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL_M = VIRTCHNL_RXDID_BIT(5_FLEX_SQ_NIC_ACL),
+ VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2_M = VIRTCHNL_RXDID_BIT(6_FLEX_SQ_NIC_2),
+ VIRTCHNL_RXDID_7_HW_RSVD_M = VIRTCHNL_RXDID_BIT(7_HW_RSVD),
+ /* 8 through 15 are reserved */
+ VIRTCHNL_RXDID_16_COMMS_GENERIC_M = VIRTCHNL_RXDID_BIT(16_COMMS_GENERIC),
+ VIRTCHNL_RXDID_17_COMMS_AUX_VLAN_M = VIRTCHNL_RXDID_BIT(17_COMMS_AUX_VLAN),
+ VIRTCHNL_RXDID_18_COMMS_AUX_IPV4_M = VIRTCHNL_RXDID_BIT(18_COMMS_AUX_IPV4),
+ VIRTCHNL_RXDID_19_COMMS_AUX_IPV6_M = VIRTCHNL_RXDID_BIT(19_COMMS_AUX_IPV6),
+ VIRTCHNL_RXDID_20_COMMS_AUX_FLOW_M = VIRTCHNL_RXDID_BIT(20_COMMS_AUX_FLOW),
+ VIRTCHNL_RXDID_21_COMMS_AUX_TCP_M = VIRTCHNL_RXDID_BIT(21_COMMS_AUX_TCP),
+ /* 22 through 63 are reserved */
+};
+
+/* virtchnl_rxq_info_flags - definition of bits in the flags field of the
+ * virtchnl_rxq_info structure.
+ *
+ * @VIRTCHNL_PTP_RX_TSTAMP: request to enable Rx timestamping
+ *
+ * Other flag bits are currently reserved and they may be extended in the
+ * future.
+ */
+enum virtchnl_rxq_info_flags {
+ VIRTCHNL_PTP_RX_TSTAMP = BIT(0),
+};
+
/* VIRTCHNL_OP_CONFIG_RX_QUEUE
* VF sends this message to set up parameters for one RX queue.
* External data buffer contains one instance of virtchnl_rxq_info.
@@ -331,8 +389,14 @@ struct virtchnl_rxq_info {
u32 databuffer_size;
u32 max_pkt_size;
u8 crc_disable;
- u8 rxdid;
- u8 pad1[2];
+ /* see enum virtchnl_rx_desc_ids;
+ * only used when VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is supported. Note
+ * that when the offload is not supported, the descriptor format aligns
+ * with VIRTCHNL_RXDID_1_32B_BASE.
+ */
+ enum virtchnl_rx_desc_ids rxdid:8;
+ enum virtchnl_rxq_info_flags flags:8; /* see virtchnl_rxq_info_flags */
+ u8 pad1;
u64 dma_ring_addr;
/* see enum virtchnl_rx_hsplit; deprecated with AVF 1.0 */
@@ -1032,10 +1096,6 @@ struct virtchnl_filter {
VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
-struct virtchnl_supported_rxdids {
- u64 supported_rxdids;
-};
-
/* VIRTCHNL_OP_EVENT
* PF sends this message to inform the VF driver of events that may affect it.
* No direct response is expected from the VF, though it may generate other
@@ -1425,6 +1485,61 @@ struct virtchnl_fdir_del {
VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
+#define VIRTCHNL_1588_PTP_CAP_RX_TSTAMP BIT(1)
+#define VIRTCHNL_1588_PTP_CAP_READ_PHC BIT(2)
+
+/**
+ * struct virtchnl_ptp_caps - Defines the PTP caps available to the VF.
+ * @caps: On send, VF sets what capabilities it requests. On reply, PF
+ * indicates what has been enabled for this VF. The PF shall not set
+ * bits which were not requested by the VF.
+ * @rsvd: Reserved bits for future extension.
+ *
+ * Structure that defines the PTP capabilities available to the VF. The VF
+ * sends VIRTCHNL_OP_1588_PTP_GET_CAPS, and must fill in the ptp_caps field
+ * indicating what capabilities it is requesting. The PF will respond with the
+ * same message with the virtchnl_ptp_caps structure indicating what is
+ * enabled for the VF.
+ *
+ * VIRTCHNL_1588_PTP_CAP_RX_TSTAMP indicates that the VF receive queues have
+ * receive timestamps enabled in the flexible descriptors. Note that this
+ * requires a VF to also negotiate to enable advanced flexible descriptors in
+ * the receive path instead of the default legacy descriptor format.
+ *
+ * VIRTCHNL_1588_PTP_CAP_READ_PHC indicates that the VF may read the PHC time
+ * via the VIRTCHNL_OP_1588_PTP_GET_TIME command.
+ *
+ * Note that in the future, additional capability flags may be added which
+ * indicate additional extended support. All fields marked as reserved by this
+ * header will be set to zero. VF implementations should verify this to ensure
+ * that future extensions do not break compatibility.
+ */
+struct virtchnl_ptp_caps {
+ u32 caps;
+ u8 rsvd[44];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(48, virtchnl_ptp_caps);
+
+/**
+ * struct virtchnl_phc_time - Contains the 64bits of PHC clock time in ns.
+ * @time: PHC time in nanoseconds
+ * @rsvd: Reserved for future extension
+ *
+ * Structure received with VIRTCHNL_OP_1588_PTP_GET_TIME. Contains the 64bits
+ * of PHC clock time in nanoseconds.
+ *
+ * VIRTCHNL_OP_1588_PTP_GET_TIME may be sent to request the current time of
+ * the PHC. This op is available in case direct access via the PHC registers
+ * is not available.
+ */
+struct virtchnl_phc_time {
+ u64 time;
+ u8 rsvd[8];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_phc_time);
+
struct virtchnl_shaper_bw {
/* Unit is Kbps */
u32 committed;
@@ -1757,6 +1872,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
}
}
break;
+ case VIRTCHNL_OP_1588_PTP_GET_CAPS:
+ valid_len = sizeof(struct virtchnl_ptp_caps);
+ break;
+ case VIRTCHNL_OP_1588_PTP_GET_TIME:
+ valid_len = sizeof(struct virtchnl_phc_time);
+ break;
/* These are always errors coming from the VF. */
case VIRTCHNL_OP_EVENT:
case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 27f42f713c89..f016263e1fcf 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1135,7 +1135,7 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
struct mlx4_buf *buf);
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, unsigned int order);
void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
@@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
bool *vlan_offload_disabled);
void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
struct _rule_hw *eth_header);
-int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index af86097641b0..46bd7550adf8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -54,7 +54,6 @@
#include <linux/mlx5/doorbell.h>
#include <linux/mlx5/eq.h>
#include <linux/timecounter.h>
-#include <linux/ptp_clock_kernel.h>
#include <net/devlink.h>
#define MLX5_ADEV_NAME "mlx5_core"
@@ -679,33 +678,8 @@ struct mlx5_rsvd_gids {
struct ida ida;
};
-#define MAX_PIN_NUM 8
-struct mlx5_pps {
- u8 pin_caps[MAX_PIN_NUM];
- struct work_struct out_work;
- u64 start[MAX_PIN_NUM];
- u8 enabled;
- u64 min_npps_period;
- u64 min_out_pulse_duration_ns;
-};
-
-struct mlx5_timer {
- struct cyclecounter cycles;
- struct timecounter tc;
- u32 nominal_c_mult;
- unsigned long overflow_period;
-};
-
-struct mlx5_clock {
- struct mlx5_nb pps_nb;
- seqlock_t lock;
- struct hwtstamp_config hwtstamp_config;
- struct ptp_clock *ptp;
- struct ptp_clock_info ptp_info;
- struct mlx5_pps pps_info;
- struct mlx5_timer timer;
-};
-
+struct mlx5_clock;
+struct mlx5_clock_dev_state;
struct mlx5_dm;
struct mlx5_fw_tracer;
struct mlx5_vxlan;
@@ -789,7 +763,8 @@ struct mlx5_core_dev {
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
#endif
- struct mlx5_clock clock;
+ struct mlx5_clock *clock;
+ struct mlx5_clock_dev_state *clock_state;
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
struct mlx5_rsc_dump *rsc_dump;
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index e68d42b8ce65..fd625e0dd869 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -115,9 +115,12 @@ enum mlx5e_ext_link_mode {
MLX5E_100GAUI_1_100GBASE_CR_KR = 11,
MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12,
MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13,
+ MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14,
MLX5E_400GAUI_8_400GBASE_CR8 = 15,
MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16,
+ MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17,
MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19,
+ MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20,
MLX5E_EXT_LINK_MODES_NUMBER,
};
diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h
index 1c1332e4df26..13274c3def66 100644
--- a/include/linux/net/intel/iidc.h
+++ b/include/linux/net/intel/iidc.h
@@ -78,6 +78,8 @@ int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
+int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
/* Structure representing auxiliary driver tailored information about the core
* PCI dev, each auxiliary driver using the IIDC interface will have an
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ab550a89b9bf..9a387d456592 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -658,6 +658,7 @@ struct netdev_queue {
struct Qdisc __rcu *qdisc_sleeping;
#ifdef CONFIG_SYSFS
struct kobject kobj;
+ const struct attribute_group **groups;
#endif
unsigned long tx_maxrate;
/*
diff --git a/include/linux/of.h b/include/linux/of.h
index eaf0e2a2b75c..9d6b8a61607f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -301,6 +301,8 @@ extern struct device_node *of_get_compatible_child(const struct device_node *par
const char *compatible);
extern struct device_node *of_get_child_by_name(const struct device_node *node,
const char *name);
+extern struct device_node *of_get_available_child_by_name(const struct device_node *node,
+ const char *name);
/* cache lookup */
extern struct device_node *of_find_next_cache_node(const struct device_node *);
@@ -578,6 +580,13 @@ static inline struct device_node *of_get_child_by_name(
return NULL;
}
+static inline struct device_node *of_get_available_child_by_name(
+ const struct device_node *node,
+ const char *name)
+{
+ return NULL;
+}
+
static inline int of_device_is_compatible(const struct device_node *device,
const char *name)
{
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 733f4ddd2ef1..e40f554ff717 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -50,8 +50,7 @@ struct dw_xpcs;
struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs);
int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
-int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
- int enable);
+void xpcs_config_eee_mult_fact(struct dw_xpcs *xpcs, u8 mult_fact);
struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr);
struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode);
void xpcs_destroy(struct dw_xpcs *xpcs);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 19f076a71f94..584710e084eb 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -54,11 +54,6 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init;
#define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features)
extern const int phy_basic_ports_array[3];
-extern const int phy_10_100_features_array[4];
-extern const int phy_basic_t1_features_array[3];
-extern const int phy_basic_t1s_p2mp_features_array[2];
-extern const int phy_gbit_features_array[2];
-extern const int phy_10gbit_features_array[1];
/*
* Set phydev->irq to PHY_POLL if interrupts are not supported,
@@ -303,9 +298,6 @@ static inline long rgmii_clock(int speed)
}
}
-#define PHY_INIT_TIMEOUT 100000
-#define PHY_FORCE_TIMEOUT 10
-
#define PHY_MAX_ADDR 32
/* Used when trying to connect to a specific phy (mii bus id:phy device id) */
@@ -611,7 +603,7 @@ struct macsec_ops;
* @eee_cfg: User configuration of EEE
* @lp_advertising: Current link partner advertised linkmodes
* @host_interfaces: PHY interface modes supported by host
- * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited
+ * @eee_disabled_modes: Energy efficient ethernet modes not to be advertised
* @autoneg: Flag autoneg being used
* @rate_matching: Current rate matching mode
* @link: Current link state
@@ -727,7 +719,7 @@ struct phy_device {
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee);
__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee);
/* Energy efficient ethernet modes which should be prohibited */
- __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes);
bool enable_tx_lpi;
bool eee_active;
struct eee_config eee_cfg;
@@ -1273,13 +1265,23 @@ struct phy_driver {
*/
int (*led_polarity_set)(struct phy_device *dev, int index,
unsigned long modes);
+
+ /**
+ * @get_next_update_time: Get the time until the next update event
+ * @dev: PHY device
+ *
+ * Callback to determine the time (in jiffies) until the next
+ * update event for the PHY state machine. Allows PHY drivers to
+ * dynamically adjust polling intervals based on link state or other
+ * conditions.
+ *
+ * Returns the time in jiffies until the next update event.
+ */
+ unsigned int (*get_next_update_time)(struct phy_device *dev);
};
#define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \
struct phy_driver, mdiodrv)
-#define PHY_ANY_ID "MATCH ANY PHY"
-#define PHY_ANY_UID 0xffffffff
-
#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0)
#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4)
#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10)
@@ -1312,15 +1314,6 @@ static inline bool phydev_id_compare(struct phy_device *phydev, u32 id)
return phy_id_compare(id, phydev->phy_id, phydev->drv->phy_id_mask);
}
-/* A Structure for boards to register fixups with the PHY Lib */
-struct phy_fixup {
- struct list_head list;
- char bus_id[MII_BUS_ID_SIZE + 3];
- u32 phy_uid;
- u32 phy_uid_mask;
- int (*run)(struct phy_device *phydev);
-};
-
const char *phy_speed_to_str(int speed);
const char *phy_duplex_to_str(unsigned int duplex);
const char *phy_rate_matching_to_str(int rate_matching);
@@ -1347,22 +1340,25 @@ void of_set_phy_timing_role(struct phy_device *phydev);
int phy_speed_down_core(struct phy_device *phydev);
/**
- * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised.
+ * phy_is_started - Convenience function to check whether PHY is started
* @phydev: The phy_device struct
- * @link_mode: The broken EEE mode
*/
-static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode)
+static inline bool phy_is_started(struct phy_device *phydev)
{
- linkmode_set_bit(link_mode, phydev->eee_broken_modes);
+ return phydev->state >= PHY_UP;
}
/**
- * phy_is_started - Convenience function to check whether PHY is started
+ * phy_disable_eee_mode - Don't advertise an EEE mode.
* @phydev: The phy_device struct
+ * @link_mode: The EEE mode to be disabled
*/
-static inline bool phy_is_started(struct phy_device *phydev)
+static inline void phy_disable_eee_mode(struct phy_device *phydev, u32 link_mode)
{
- return phydev->state >= PHY_UP;
+ WARN_ON(phy_is_started(phydev));
+
+ linkmode_set_bit(link_mode, phydev->eee_disabled_modes);
+ linkmode_clear_bit(link_mode, phydev->advertising_eee);
}
void phy_resolve_aneg_pause(struct phy_device *phydev);
@@ -1747,15 +1743,6 @@ static inline bool phy_is_default_hwtstamp(struct phy_device *phydev)
}
/**
- * phy_is_internal - Convenience function for testing if a PHY is internal
- * @phydev: the phy_device struct
- */
-static inline bool phy_is_internal(struct phy_device *phydev)
-{
- return phydev->is_internal;
-}
-
-/**
* phy_on_sfp - Convenience function for testing if a PHY is on an SFP module
* @phydev: the phy_device struct
*/
@@ -2045,8 +2032,7 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev,
const struct phy_plca_cfg *plca_cfg);
int genphy_c45_plca_get_status(struct phy_device *phydev,
struct phy_plca_status *plca_st);
-int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
- unsigned long *lp);
+int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *lp);
int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
struct ethtool_keee *data);
int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
@@ -2078,7 +2064,6 @@ int phy_drivers_register(struct phy_driver *new_driver, int n,
struct module *owner);
void phy_error(struct phy_device *phydev);
void phy_state_machine(struct work_struct *work);
-void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies);
void phy_trigger_machine(struct phy_device *phydev);
void phy_mac_interrupt(struct phy_device *phydev);
void phy_start_machine(struct phy_device *phydev);
@@ -2114,11 +2099,13 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause);
s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
const int *delay_values, int size, bool is_rx);
+int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev,
+ enum ethtool_link_mode_bit_indices linkmode,
+ u32 *val);
+
void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv,
bool *tx_pause, bool *rx_pause);
-int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
- int (*run)(struct phy_device *));
int phy_register_fixup_for_id(const char *bus_id,
int (*run)(struct phy_device *));
int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 898b00451bbf..08df65f6867a 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -442,7 +442,6 @@ struct phylink_pcs_ops;
* are supported by this PCS.
* @ops: a pointer to the &struct phylink_pcs_ops structure
* @phylink: pointer to &struct phylink_config
- * @neg_mode: provide PCS neg mode via "mode" argument
* @poll: poll the PCS for link changes
* @rxc_always_on: The MAC driver requires the reference clock
* to always be on. Standalone PCS drivers which
@@ -459,7 +458,6 @@ struct phylink_pcs {
DECLARE_PHY_INTERFACE_MASK(supported_interfaces);
const struct phylink_pcs_ops *ops;
struct phylink *phylink;
- bool neg_mode;
bool poll;
bool rxc_always_on;
};
@@ -477,6 +475,10 @@ struct phylink_pcs {
* @pcs_an_restart: restart 802.3z BaseX autonegotiation.
* @pcs_link_up: program the PCS for the resolved link configuration
* (where necessary).
+ * @pcs_disable_eee: optional notification to PCS that EEE has been disabled
+ * at the MAC.
+ * @pcs_enable_eee: optional notification to PCS that EEE will be enabled at
+ * the MAC.
* @pcs_pre_init: configure PCS components necessary for MAC hardware
* initialization e.g. RX clock for stmmac.
*/
@@ -500,6 +502,8 @@ struct phylink_pcs_ops {
void (*pcs_an_restart)(struct phylink_pcs *pcs);
void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface, int speed, int duplex);
+ void (*pcs_disable_eee)(struct phylink_pcs *pcs);
+ void (*pcs_enable_eee)(struct phylink_pcs *pcs);
int (*pcs_pre_init)(struct phylink_pcs *pcs);
};
@@ -626,6 +630,22 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface, int speed, int duplex);
/**
+ * pcs_disable_eee() - Disable EEE at the PCS
+ * @pcs: a pointer to a &struct phylink_pcs
+ *
+ * Optional method informing the PCS that EEE has been disabled at the MAC.
+ */
+void pcs_disable_eee(struct phylink_pcs *pcs);
+
+/**
+ * pcs_enable_eee() - Enable EEE at the PCS
+ * @pcs: a pointer to a &struct phylink_pcs
+ *
+ * Optional method informing the PCS that EEE is about to be enabled at the MAC.
+ */
+void pcs_enable_eee(struct phylink_pcs *pcs);
+
+/**
* pcs_pre_init() - Configure PCS components necessary for MAC initialization
* @pcs: a pointer to a &struct phylink_pcs.
*
@@ -737,6 +757,18 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface)
}
}
+/**
+ * phylink_mac_implements_lpi() - determine if MAC implements LPI ops
+ * @ops: phylink_mac_ops structure
+ *
+ * Returns true if the phylink MAC operations structure indicates that the
+ * LPI operations have been implemented, false otherwise.
+ */
+static inline bool phylink_mac_implements_lpi(const struct phylink_mac_ops *ops)
+{
+ return ops && ops->mac_disable_tx_lpi && ops->mac_enable_tx_lpi;
+}
+
void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
unsigned int neg_mode, u16 bmsr, u16 lpa);
void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 4bc2ee0b10b0..ccaaf4c7d5f6 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -43,6 +43,7 @@ extern void rtnl_lock(void);
extern void rtnl_unlock(void);
extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
+extern int rtnl_lock_interruptible(void);
extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 836a7e200f39..812011d8b67e 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -222,7 +222,6 @@ struct sctp_datahdr {
__be16 stream;
__be16 ssn;
__u32 ppid;
- /* __u8 payload[]; */
};
struct sctp_data_chunk {
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c9878a612e53..6d2aa77ea963 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -231,7 +231,7 @@ struct plat_stmmacenet_data {
u8 tx_sched_algorithm;
struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
- void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode);
+ void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
int (*serdes_powerup)(struct net_device *ndev, void *priv);
void (*serdes_powerdown)(struct net_device *ndev, void *priv);
@@ -254,6 +254,8 @@ struct plat_stmmacenet_data {
struct clk *clk_ptp_ref;
unsigned long clk_ptp_rate;
unsigned long clk_ref_rate;
+ struct clk_bulk_data *clks;
+ int num_clks;
unsigned int mult_fact_100ns;
s32 ptp_max_adj;
u32 cdc_error_adj;
diff --git a/include/linux/unroll.h b/include/linux/unroll.h
index d42fd6366373..863fb69f6a7e 100644
--- a/include/linux/unroll.h
+++ b/include/linux/unroll.h
@@ -9,6 +9,50 @@
#include <linux/args.h>
+#ifdef CONFIG_CC_IS_CLANG
+#define __pick_unrolled(x, y) _Pragma(#x)
+#elif CONFIG_GCC_VERSION >= 80000
+#define __pick_unrolled(x, y) _Pragma(#y)
+#else
+#define __pick_unrolled(x, y) /* not supported */
+#endif
+
+/**
+ * unrolled - loop attributes to ask the compiler to unroll it
+ *
+ * Usage:
+ *
+ * #define BATCH 8
+ *
+ * unrolled_count(BATCH)
+ * for (u32 i = 0; i < BATCH; i++)
+ * // loop body without cross-iteration dependencies
+ *
+ * This is only a hint and the compiler is free to disable unrolling if it
+ * thinks the count is suboptimal and may hurt performance and/or hugely
+ * increase object code size.
+ * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends
+ * on what iter x will do with variables) is not a strict requirement, but
+ * provides best performance and object code size.
+ * Available only on Clang and GCC 8.x onwards.
+ */
+
+/* Ask the compiler to pick an optimal unroll count, Clang only */
+#define unrolled \
+ __pick_unrolled(clang loop unroll(enable), /* nothing */)
+
+/* Unroll each @n iterations of the loop */
+#define unrolled_count(n) \
+ __pick_unrolled(clang loop unroll_count(n), GCC unroll n)
+
+/* Unroll the whole loop */
+#define unrolled_full \
+ __pick_unrolled(clang loop unroll(full), GCC unroll 65534)
+
+/* Never unroll the loop */
+#define unrolled_none \
+ __pick_unrolled(clang loop unroll(disable), GCC unroll 1)
+
#define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args)
#define __UNROLL_0(MACRO, args...)
diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h
index 33a4c146dc19..2ca60828f28b 100644
--- a/include/linux/usb/r8152.h
+++ b/include/linux/usb/r8152.h
@@ -30,6 +30,7 @@
#define VENDOR_ID_NVIDIA 0x0955
#define VENDOR_ID_TPLINK 0x2357
#define VENDOR_ID_DLINK 0x2001
+#define VENDOR_ID_DELL 0x413c
#define VENDOR_ID_ASUS 0x0b05
#if IS_REACHABLE(CONFIG_USB_RTL8152)
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c39a426ebf52..cab6146a510a 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -24,6 +24,11 @@
*/
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
+static inline bool napi_id_valid(unsigned int napi_id)
+{
+ return napi_id >= MIN_NAPI_ID;
+}
+
#define BUSY_POLL_BUDGET 8
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -114,7 +119,7 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
- if (napi_id >= MIN_NAPI_ID)
+ if (napi_id_valid(napi_id))
napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
READ_ONCE(sk->sk_prefer_busy_poll),
READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
@@ -129,7 +134,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
/* If the skb was already marked with a valid NAPI ID, avoid overwriting
* it.
*/
- if (skb->napi_id < MIN_NAPI_ID)
+ if (!napi_id_valid(skb->napi_id))
skb->napi_id = napi->napi_id;
#endif
}
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 04383d90a1e3..5927910ec06e 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -43,6 +43,8 @@ struct fib_rule {
struct fib_kuid_range uid_range;
struct fib_rule_port_range sport_range;
struct fib_rule_port_range dport_range;
+ u16 sport_mask;
+ u16 dport_mask;
struct rcu_head rcu;
};
@@ -146,6 +148,17 @@ static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
ntohs(port) <= a->end;
}
+static inline bool fib_rule_port_match(const struct fib_rule_port_range *range,
+ u16 port_mask, __be16 port)
+{
+ if ((range->start ^ ntohs(port)) & port_mask)
+ return false;
+ if (!port_mask && fib_rule_port_range_set(range) &&
+ !fib_rule_port_inrange(range, port))
+ return false;
+ return true;
+}
+
static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
{
return a->start != 0 && a->end != 0 && a->end < 0xffff &&
@@ -159,6 +172,12 @@ static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
a->end == b->end;
}
+static inline bool
+fib_rule_port_is_range(const struct fib_rule_port_range *range)
+{
+ return range->start != range->end;
+}
+
static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
{
return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto ||
@@ -178,10 +197,10 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
struct netlink_ext_ack *extack);
unsigned int fib_rules_seq_read(const struct net *net, int family);
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack);
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack);
+int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held);
+int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held);
INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule,
struct flowi *fl, int flags));
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c7f42844c79a..d9978ffacc97 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -90,6 +90,7 @@ struct inet_connection_sock {
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
__u32 icsk_rto_min;
+ u32 icsk_rto_max;
__u32 icsk_delack_max;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
@@ -189,9 +190,6 @@ static inline void inet_csk_delack_init(struct sock *sk)
memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
}
-void inet_csk_delete_keepalive_timer(struct sock *sk);
-void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
-
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/include/net/ip.h b/include/net/ip.h
index ba7b43447775..ce5e59957dd5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -92,11 +92,12 @@ static inline void ipcm_init(struct ipcm_cookie *ipcm)
static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
const struct inet_sock *inet)
{
- ipcm_init(ipcm);
+ *ipcm = (struct ipcm_cookie) {
+ .tos = READ_ONCE(inet->tos),
+ };
+
+ sockcm_init(&ipcm->sockc, &inet->sk);
- ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
- ipcm->sockc.priority = READ_ONCE(inet->sk.sk_priority);
- ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
ipcm->protocol = inet->inet_num;
@@ -257,13 +258,6 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
return RT_SCOPE_UNIVERSE;
}
-static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
-{
- u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos);
-
- return dsfield & INET_DSCP_MASK;
-}
-
/* datagram.c */
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
@@ -673,6 +667,14 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast,
memcpy(buf, &naddr, sizeof(naddr));
}
+#if IS_MODULE(CONFIG_IPV6)
+#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
+#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
+#else
+#define EXPORT_IPV6_MOD(X)
+#define EXPORT_IPV6_MOD_GPL(X)
+#endif
+
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/ipv6.h>
#endif
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f5c43ad1565e..9614006f483c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -363,15 +363,6 @@ struct ipcm6_cookie {
struct ipv6_txoptions *opt;
};
-static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
-{
- *ipc6 = (struct ipcm6_cookie) {
- .hlimit = -1,
- .tclass = -1,
- .dontfrag = -1,
- };
-}
-
static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
const struct sock *sk)
{
@@ -380,6 +371,8 @@ static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
.tclass = inet6_sk(sk)->tclass,
.dontfrag = inet6_test_bit(DONTFRAG, sk),
};
+
+ sockcm_init(&ipc6->sockc, sk);
}
static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
index 43574bd6612f..ab05024be518 100644
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
@@ -198,6 +198,53 @@ struct libeth_rx_pt {
enum xdp_rss_hash_type hash_type:16;
};
+/**
+ * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor
+ * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware
+ * @ipe: IP checksum error
+ * @eipe: external (outermost) IP header (only for tunels)
+ * @eudpe: external (outermost) UDP checksum error (only for tunels)
+ * @ipv6exadd: IPv6 header with extension headers
+ * @l4e: L4 integrity error
+ * @pprs: set for packets that skip checksum calculation in the HW pre parser
+ * @nat: the packet is a UDP tunneled packet
+ * @raw_csum_valid: set if raw checksum is valid
+ * @pad: padding to naturally align raw_csum field
+ * @raw_csum: raw checksum
+ */
+struct libeth_rx_csum {
+ u32 l3l4p:1;
+ u32 ipe:1;
+ u32 eipe:1;
+ u32 eudpe:1;
+ u32 ipv6exadd:1;
+ u32 l4e:1;
+ u32 pprs:1;
+ u32 nat:1;
+
+ u32 raw_csum_valid:1;
+ u32 pad:7;
+ u32 raw_csum:16;
+};
+
+/**
+ * struct libeth_rqe_info - receive queue element info
+ * @len: packet length
+ * @ptype: packet type based on types programmed into the device
+ * @eop: whether it's the last fragment of the packet
+ * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error
+ * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration
+ */
+struct libeth_rqe_info {
+ u32 len;
+
+ u32 ptype:14;
+ u32 eop:1;
+ u32 rxe:1;
+
+ u32 vlan:16;
+};
+
void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
/**
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index b02bb9f109d5..825141d675e5 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -23,6 +23,7 @@ struct netdev_queue_stats_rx {
u64 hw_drops;
u64 hw_drop_overruns;
+ u64 csum_complete;
u64 csum_unnecessary;
u64 csum_none;
u64 csum_bad;
@@ -117,6 +118,10 @@ struct netdev_stat_ops {
*
* @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
* queue's memory is written at the specified address.
+ *
+ * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
+ * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
+ * be called for an interface which is open.
*/
struct netdev_queue_mgmt_ops {
size_t ndo_queue_mem_size;
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index 596836abf7bf..af40842f229d 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -16,6 +16,7 @@ struct netdev_rx_queue {
struct rps_dev_flow_table __rcu *rps_flow_table;
#endif
struct kobject kobj;
+ const struct attribute_group **groups;
struct net_device *dev;
netdevice_tracker dev_tracker;
diff --git a/include/net/netlink.h b/include/net/netlink.h
index e015ffbed819..29e0db940382 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -118,6 +118,7 @@
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
+ * nla_put_empty_nest(skb, type) create an empty nest
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
@@ -2241,6 +2242,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
}
/**
+ * nla_put_empty_nest - Create an empty nest
+ * @skb: socket buffer the message is stored in
+ * @attrtype: attribute type of the container
+ *
+ * This function is a helper for creating empty nests.
+ *
+ * Returns: 0 when successful or -EMSGSIZE on failure.
+ */
+static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype)
+{
+ return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE;
+}
+
+/**
* __nla_validate_nested - Validate a stream of nested attributes
* @start: container attribute
* @maxtype: maximum attribute type to be expected
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 1b58faa4f20f..c61d5b21e7b4 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -24,11 +24,20 @@ struct net_iov {
unsigned long __unused_padding;
unsigned long pp_magic;
struct page_pool *pp;
- struct dmabuf_genpool_chunk_owner *owner;
+ struct net_iov_area *owner;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
+struct net_iov_area {
+ /* Array of net_iovs for this area. */
+ struct net_iov *niovs;
+ size_t num_niovs;
+
+ /* Offset into the dma-buf where this chunk starts. */
+ unsigned long base_virtual;
+};
+
/* These fields in struct page are used by the page_pool and net stack:
*
* struct {
@@ -54,6 +63,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
#undef NET_IOV_ASSERT_OFFSET
+static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov)
+{
+ return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+ return niov - net_iov_owner(niov)->niovs;
+}
+
/* netmem */
/**
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 46452da35206..45ac125e8aeb 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -181,6 +181,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps;
int sysctl_tcp_rto_min_us;
+ int sysctl_tcp_rto_max_ms;
u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle;
diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
new file mode 100644
index 000000000000..b3e665897767
--- /dev/null
+++ b/include/net/page_pool/memory_provider.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_PAGE_POOL_MEMORY_PROVIDER_H
+#define _NET_PAGE_POOL_MEMORY_PROVIDER_H
+
+#include <net/netmem.h>
+#include <net/page_pool/types.h>
+
+struct netdev_rx_queue;
+struct sk_buff;
+
+struct memory_provider_ops {
+ netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp);
+ bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem);
+ int (*init)(struct page_pool *pool);
+ void (*destroy)(struct page_pool *pool);
+ int (*nl_fill)(void *mp_priv, struct sk_buff *rsp,
+ struct netdev_rx_queue *rxq);
+ void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq);
+};
+
+bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
+void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
+void net_mp_niov_clear_page_pool(struct net_iov *niov);
+
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p);
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p);
+
+/**
+ * net_mp_netmem_place_in_cache() - give a netmem to a page pool
+ * @pool: the page pool to place the netmem into
+ * @netmem: netmem to give
+ *
+ * Push an accounted netmem into the page pool's allocation cache. The caller
+ * must ensure that there is space in the cache. It should only be called off
+ * the mp_ops->alloc_netmems() path.
+ */
+static inline void net_mp_netmem_place_in_cache(struct page_pool *pool,
+ netmem_ref netmem)
+{
+ pool->alloc.cache[pool->alloc.count++] = netmem;
+}
+
+#endif
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 7f405672b089..36eb57d73abc 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -152,8 +152,11 @@ struct page_pool_stats {
*/
#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
+struct memory_provider_ops;
+
struct pp_memory_provider_params {
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
};
struct page_pool {
@@ -216,6 +219,7 @@ struct page_pool {
struct ptr_ring ring;
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
diff --git a/include/net/sock.h b/include/net/sock.h
index 8036b3b79cd8..edbb870e3f86 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -954,6 +954,7 @@ enum sock_flags {
SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */
SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */
+ SOCK_TIMESTAMPING_ANY, /* Copy of sk_tsflags & TSFLAGS_ANY */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1284,10 +1285,6 @@ struct proto {
unsigned int inuse_idx;
#endif
-#if IS_ENABLED(CONFIG_MPTCP)
- int (*forward_alloc_get)(const struct sock *sk);
-#endif
-
bool (*stream_memory_free)(const struct sock *sk, int wake);
bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
@@ -1348,15 +1345,6 @@ int sock_load_diag_module(int family, int protocol);
INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
-static inline int sk_forward_alloc_get(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_MPTCP)
- if (sk->sk_prot->forward_alloc_get)
- return sk->sk_prot->forward_alloc_get(sk);
-#endif
- return READ_ONCE(sk->sk_forward_alloc);
-}
-
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
@@ -1828,6 +1816,7 @@ static inline void sockcm_init(struct sockcm_cookie *sockc,
const struct sock *sk)
{
*sockc = (struct sockcm_cookie) {
+ .mark = READ_ONCE(sk->sk_mark),
.tsflags = READ_ONCE(sk->sk_tsflags),
.priority = READ_ONCE(sk->sk_priority),
};
@@ -2664,13 +2653,13 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
{
#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_RCVTSTAMP) | \
- (1UL << SOCK_RCVMARK) |\
- (1UL << SOCK_RCVPRIORITY))
+ (1UL << SOCK_RCVMARK) | \
+ (1UL << SOCK_RCVPRIORITY) | \
+ (1UL << SOCK_TIMESTAMPING_ANY))
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_RECV_CMSGS ||
- READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+ if (READ_ONCE(sk->sk_flags) & FLAGS_RECV_CMSGS)
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 930cda5b5eb9..ced32b924a9c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -144,8 +144,9 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
#define TCP_DELACK_MIN 4U
#define TCP_ATO_MIN 4U
#endif
-#define TCP_RTO_MAX ((unsigned)(120*HZ))
-#define TCP_RTO_MIN ((unsigned)(HZ/5))
+#define TCP_RTO_MAX_SEC 120
+#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ))
+#define TCP_RTO_MIN ((unsigned)(HZ / 5))
#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
@@ -416,6 +417,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
+void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
@@ -753,10 +755,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu);
int tcp_mss_to_mtu(struct sock *sk, int mss);
void tcp_mtup_init(struct sock *sk);
+static inline unsigned int tcp_rto_max(const struct sock *sk)
+{
+ return READ_ONCE(inet_csk(sk)->icsk_rto_max);
+}
+
static inline void tcp_bound_rto(struct sock *sk)
{
- if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
- inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
+ inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk));
}
static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
@@ -1437,10 +1443,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk)
static inline void tcp_reset_xmit_timer(struct sock *sk,
const int what,
unsigned long when,
- const unsigned long max_when)
+ bool pace_delay)
{
- inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
- max_when);
+ if (pace_delay)
+ when += tcp_pacing_delay(sk);
+ inet_csk_reset_xmit_timer(sk, what, when,
+ tcp_rto_max(sk));
}
/* Something is really bad, we could not queue an additional packet,
@@ -1469,7 +1477,7 @@ static inline void tcp_check_probe_timer(struct sock *sk)
{
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- tcp_probe0_base(sk), TCP_RTO_MAX);
+ tcp_probe0_base(sk), true);
}
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 784cd34f5bba..15086dcf51d8 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -196,6 +196,23 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return xp_raw_get_data(pool, addr);
}
+/**
+ * xsk_buff_raw_get_ctx - get &xdp_desc context
+ * @pool: XSk buff pool desc address belongs to
+ * @addr: desc address (from userspace)
+ *
+ * Wrapper for xp_raw_get_ctx() to be used in drivers, see its kdoc for
+ * details.
+ *
+ * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata
+ * pointer, if it is present and valid (initialized to %NULL otherwise).
+ */
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return xp_raw_get_ctx(pool, addr);
+}
+
#define XDP_TXMD_FLAGS_VALID ( \
XDP_TXMD_FLAGS_TIMESTAMP | \
XDP_TXMD_FLAGS_CHECKSUM | \
@@ -207,20 +224,27 @@ xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta)
return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
{
struct xsk_tx_metadata *meta;
if (!pool->tx_metadata_len)
return NULL;
- meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+ meta = data - pool->tx_metadata_len;
if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
return NULL; /* no way to signal the error to the user */
return meta;
}
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ return __xsk_buff_get_metadata(pool, xp_raw_get_data(pool, addr));
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -388,12 +412,25 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return NULL;
}
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return (struct xdp_desc_ctx){ };
+}
+
static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
{
return false;
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
+{
+ return NULL;
+}
+
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
{
return NULL;
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 50779406bc2d..1dcd4d71468a 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -141,6 +141,14 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
+
+struct xdp_desc_ctx {
+ dma_addr_t dma;
+ struct xsk_tx_metadata *meta;
+};
+
+struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr);
+
static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
{
return xskb->dma;
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index a27c4b619dff..1a40c41ff8c3 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -259,6 +259,12 @@ TRACE_EVENT(tcp_retransmit_synack,
__entry->saddr_v6, __entry->daddr_v6)
);
+DECLARE_TRACE(tcp_cwnd_reduction_tp,
+ TP_PROTO(const struct sock *sk, int newly_acked_sacked,
+ int newly_lost, int flag),
+ TP_ARGS(sk, newly_acked_sacked, newly_lost, flag)
+);
+
#include <trace/events/net_probe_common.h>
TRACE_EVENT(tcp_probe,
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index e78cbd85ce7c..42abf0679fb4 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -182,7 +182,7 @@ struct canfd_frame {
/*
* defined bits for canxl_frame.flags
*
- * The canxl_frame.flags element contains two bits CANXL_XLF and CANXL_SEC
+ * The canxl_frame.flags element contains three bits CANXL_[XLF|SEC|RRS]
* and shares the relative position of the struct can[fd]_frame.len element.
* The CANXL_XLF bit ALWAYS needs to be set to indicate a valid CAN XL frame.
* As a side effect setting this bit intentionally breaks the length checks
@@ -192,6 +192,7 @@ struct canfd_frame {
*/
#define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */
#define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */
+#define CANXL_RRS 0x02 /* Remote Request Substitution */
/* the 8-bit VCID is optionally placed in the canxl_frame.prio element */
#define CANXL_VCID_OFFSET 16 /* bit offset of VCID in prio element */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9b18c4cfe56f..2feba0929a8a 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2059,6 +2059,24 @@ enum ethtool_link_mode_bit_indices {
ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100,
ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101,
ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102,
+ ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103,
+ ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104,
+ ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105,
+ ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106,
+ ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107,
+ ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108,
+ ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109,
+ ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110,
+ ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111,
+ ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112,
+ ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113,
+ ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114,
+ ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115,
+ ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116,
+ ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117,
+ ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118,
+ ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119,
+ ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120,
/* must be last entry */
__ETHTOOL_LINK_MODE_MASK_NBITS
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 00e9890ca3c0..95ec01b15c65 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -70,6 +70,8 @@ enum {
FRA_DSCP, /* dscp */
FRA_FLOWLABEL, /* flowlabel */
FRA_FLOWLABEL_MASK, /* flowlabel mask */
+ FRA_SPORT_MASK, /* sport mask */
+ FRA_DPORT_MASK, /* dport mask */
__FRA_MAX
};
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index e4be227d3ad6..4e82f3871473 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -87,6 +87,11 @@ enum {
};
enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
NETDEV_A_PAGE_POOL_NAPI_ID,
@@ -94,6 +99,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -131,11 +137,18 @@ enum {
};
enum {
+ __NETDEV_A_XSK_INFO_MAX,
+ NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
+ NETDEV_A_QUEUE_XSK,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index dbf896f3146c..32a27b4a5020 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -136,6 +136,7 @@ enum {
#define TCP_AO_REPAIR 42 /* Get/Set SNEs and ISNs */
#define TCP_IS_MPTCP 43 /* Is MPTCP being used? */
+#define TCP_RTO_MAX_MS 44 /* max rto time in ms */
#define TCP_REPAIR_ON 1
#define TCP_REPAIR_OFF 0
diff --git a/io_uring/napi.c b/io_uring/napi.c
index b1ade3fda30f..4a10de03e426 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -44,7 +44,7 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
struct io_napi_entry *e;
/* Non-NAPI IDs can be rejected. */
- if (napi_id < MIN_NAPI_ID)
+ if (!napi_id_valid(napi_id))
return -EINVAL;
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
@@ -87,7 +87,7 @@ static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
struct io_napi_entry *e;
/* Non-NAPI IDs can be rejected. */
- if (napi_id < MIN_NAPI_ID)
+ if (!napi_id_valid(napi_id))
return -EINVAL;
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1af972a92d06..238321830993 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2557,15 +2557,6 @@ config TEST_BPF
If unsure, say N.
-config TEST_BLACKHOLE_DEV
- tristate "Test blackhole netdev functionality"
- depends on m && NET
- help
- This builds the "test_blackhole_dev" module that validates the
- data path through this blackhole netdev.
-
- If unsure, say N.
-
config FIND_BIT_BENCHMARK
tristate "Test find_bit functions"
help
@@ -2888,6 +2879,17 @@ config USERCOPY_KUNIT_TEST
on the copy_to/from_user infrastructure, making sure basic
user/kernel boundary testing is working.
+config BLACKHOLE_DEV_KUNIT_TEST
+ tristate "Test blackhole netdev functionality" if !KUNIT_ALL_TESTS
+ depends on NET
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This builds the "blackhole_dev_kunit" module that validates the
+ data path through this blackhole netdev.
+
+ If unsure, say N.
+
config TEST_UDELAY
tristate "udelay test driver"
help
diff --git a/lib/Makefile b/lib/Makefile
index d5cfc7afbbb8..19ff6993c2bc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -102,7 +102,6 @@ obj-$(CONFIG_TEST_RUNTIME) += tests/
obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
-obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
obj-$(CONFIG_TEST_HMM) += test_hmm.o
@@ -393,6 +392,7 @@ obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o
obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o
+obj-$(CONFIG_BLACKHOLE_DEV_KUNIT_TEST) += blackhole_dev_kunit.o
obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
diff --git a/lib/test_blackhole_dev.c b/lib/blackhole_dev_kunit.c
index ec290ac2a0d9..06834ab35f43 100644
--- a/lib/test_blackhole_dev.c
+++ b/lib/blackhole_dev_kunit.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * This module tests the blackhole_dev that is created during the
+ * This tests the blackhole_dev that is created during the
* net subsystem initialization. The test this module performs is
* by injecting an skb into the stack with skb->dev as the
* blackhole_dev and expects kernel to behave in a sane manner
@@ -9,9 +9,8 @@
* Copyright (c) 2018, Mahesh Bandewar <maheshb@google.com>
*/
-#include <linux/init.h>
+#include <kunit/test.h>
#include <linux/module.h>
-#include <linux/printk.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
@@ -25,17 +24,15 @@
#define UDP_PORT 1234
-static int __init test_blackholedev_init(void)
+static void test_blackholedev(struct kunit *test)
{
struct ipv6hdr *ip6h;
struct sk_buff *skb;
struct udphdr *uh;
int data_len;
- int ret;
skb = alloc_skb(SKB_SIZE, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
+ KUNIT_ASSERT_NOT_NULL(test, skb);
/* Reserve head-room for the headers */
skb_reserve(skb, HEAD_SIZE);
@@ -55,7 +52,7 @@ static int __init test_blackholedev_init(void)
ip6h = (struct ipv6hdr *)skb_push(skb, sizeof(struct ipv6hdr));
skb_set_network_header(skb, 0);
ip6h->hop_limit = 32;
- ip6h->payload_len = data_len + sizeof(struct udphdr);
+ ip6h->payload_len = htons(data_len + sizeof(struct udphdr));
ip6h->nexthdr = IPPROTO_UDP;
ip6h->saddr = in6addr_loopback;
ip6h->daddr = in6addr_loopback;
@@ -68,32 +65,20 @@ static int __init test_blackholedev_init(void)
skb->dev = blackhole_netdev;
/* Now attempt to send the packet */
- ret = dev_queue_xmit(skb);
-
- switch (ret) {
- case NET_XMIT_SUCCESS:
- pr_warn("dev_queue_xmit() returned NET_XMIT_SUCCESS\n");
- break;
- case NET_XMIT_DROP:
- pr_warn("dev_queue_xmit() returned NET_XMIT_DROP\n");
- break;
- case NET_XMIT_CN:
- pr_warn("dev_queue_xmit() returned NET_XMIT_CN\n");
- break;
- default:
- pr_err("dev_queue_xmit() returned UNKNOWN(%d)\n", ret);
- }
-
- return 0;
+ KUNIT_EXPECT_EQ(test, dev_queue_xmit(skb), NET_XMIT_SUCCESS);
}
-static void __exit test_blackholedev_exit(void)
-{
- pr_warn("test_blackholedev module terminating.\n");
-}
+static struct kunit_case blackholedev_cases[] = {
+ KUNIT_CASE(test_blackholedev),
+ {},
+};
+
+static struct kunit_suite blackholedev_suite = {
+ .name = "blackholedev",
+ .test_cases = blackholedev_cases,
+};
-module_init(test_blackholedev_init);
-module_exit(test_blackholedev_exit);
+kunit_test_suite(blackholedev_suite);
MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
MODULE_DESCRIPTION("module test of the blackhole_dev");
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 1a52a0bca086..7e1ad229e133 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg,
/* host join */
if (!port) {
- if (mp->host_joined) {
+ if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
return -EEXIST;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 46e8ed9d64da..9b1d5f036f57 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -963,7 +963,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb->dev = dev;
skb->priority = sockc.priority;
- skb->mark = READ_ONCE(sk->sk_mark);
+ skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, &sockc);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b252e9459fd..18064be6cf3e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -159,6 +159,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <net/rps.h>
#include <linux/phy_link_topology.h>
@@ -1007,7 +1008,7 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
WARN_ON_ONCE(!rcu_read_lock_held());
- if (napi_id < MIN_NAPI_ID)
+ if (!napi_id_valid(napi_id))
return NULL;
napi = napi_by_id(napi_id);
@@ -6190,16 +6191,18 @@ EXPORT_SYMBOL(netif_receive_skb_list);
static void flush_backlog(struct work_struct *work)
{
struct sk_buff *skb, *tmp;
+ struct sk_buff_head list;
struct softnet_data *sd;
+ __skb_queue_head_init(&list);
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
backlog_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
- dev_kfree_skb_irq(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
@@ -6207,14 +6210,16 @@ static void flush_backlog(struct work_struct *work)
local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
local_bh_enable();
+
+ __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY);
}
static bool flush_required(int cpu)
@@ -6806,7 +6811,7 @@ static void napi_hash_add(struct napi_struct *napi)
/* 0..NR_CPUS range is reserved for sender_cpu use */
do {
- if (unlikely(++napi_gen_id < MIN_NAPI_ID))
+ if (unlikely(!napi_id_valid(++napi_gen_id)))
napi_gen_id = MIN_NAPI_ID;
} while (napi_by_id(napi_gen_id));
@@ -6977,7 +6982,7 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
higher = &dev->napi_list;
list_for_each_entry(pos, &dev->napi_list, dev_list) {
- if (pos->napi_id >= MIN_NAPI_ID)
+ if (napi_id_valid(pos->napi_id))
pos_id = pos->napi_id;
else if (pos->config)
pos_id = pos->config->napi_id;
@@ -7159,6 +7164,9 @@ void __netif_napi_del_locked(struct napi_struct *napi)
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
+ /* Make sure NAPI is disabled (or was never enabled). */
+ WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
+
if (napi->config) {
napi->index = -1;
napi->config = NULL;
@@ -9262,7 +9270,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags,
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
- unsigned int old_flags = dev->flags;
+ old_flags = dev->flags;
dev->gflags ^= IFF_PROMISC;
@@ -11826,6 +11834,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
}
EXPORT_SYMBOL(unregister_netdevice_queue);
+static void dev_memory_provider_uninstall(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ struct netdev_rx_queue *rxq = &dev->_rx[i];
+ struct pp_memory_provider_params *p = &rxq->mp_params;
+
+ if (p->mp_ops && p->mp_ops->uninstall)
+ p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
+ }
+}
+
void unregister_netdevice_many_notify(struct list_head *head,
u32 portid, const struct nlmsghdr *nlh)
{
@@ -11880,7 +11901,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
- dev_dmabuf_uninstall(dev);
+ dev_memory_provider_uninstall(dev);
netdev_offload_xstats_disable_all(dev);
diff --git a/net/core/dev.h b/net/core/dev.h
index a5b166bbd169..caa13e431a6b 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -299,6 +299,18 @@ void xdp_do_check_flushed(struct napi_struct *napi);
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
+/* Best effort check that NAPI is not idle (can't be scheduled to run) */
+static inline void napi_assert_will_not_race(const struct napi_struct *napi)
+{
+ /* uninitialized instance, can't race */
+ if (!napi->poll_list.next)
+ return;
+
+ /* SCHED bit is set on disabled instances */
+ WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
+ WARN_ON(READ_ONCE(napi->list_owner) != -1);
+}
+
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 3bba3f018df0..7c6e0b5b6acb 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -16,6 +16,7 @@
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <trace/events/page_pool.h>
#include "devmem.h"
@@ -27,20 +28,28 @@
/* Protected by rtnl_lock() */
static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+static const struct memory_provider_ops dmabuf_devmem_ops;
+
+bool net_is_devmem_iov(struct net_iov *niov)
+{
+ return niov->pp->mp_ops == &dmabuf_devmem_ops;
+}
+
static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
struct gen_pool_chunk *chunk,
void *not_used)
{
struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
- kvfree(owner->niovs);
+ kvfree(owner->area.niovs);
kfree(owner);
}
static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
{
- struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+ struct dmabuf_genpool_chunk_owner *owner;
+ owner = net_devmem_iov_to_chunk_owner(niov);
return owner->base_dma_addr +
((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
}
@@ -83,7 +92,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
offset = dma_addr - owner->base_dma_addr;
index = offset / PAGE_SIZE;
- niov = &owner->niovs[index];
+ niov = &owner->area.niovs[index];
niov->pp_magic = 0;
niov->pp = NULL;
@@ -94,7 +103,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
void net_devmem_free_dmabuf(struct net_iov *niov)
{
- struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
+ struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov);
unsigned long dma_addr = net_devmem_get_dma_addr(niov);
if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
@@ -117,6 +126,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
WARN_ON(rxq->mp_params.mp_priv != binding);
rxq->mp_params.mp_priv = NULL;
+ rxq->mp_params.mp_ops = NULL;
rxq_idx = get_netdev_rx_queue_index(rxq);
@@ -152,7 +162,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
}
rxq = __netif_get_rx_queue(dev, rxq_idx);
- if (rxq->mp_params.mp_priv) {
+ if (rxq->mp_params.mp_ops) {
NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
return -EEXIST;
}
@@ -170,6 +180,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return err;
rxq->mp_params.mp_priv = binding;
+ rxq->mp_params.mp_ops = &dmabuf_devmem_ops;
err = netdev_rx_queue_restart(dev, rxq_idx);
if (err)
@@ -179,6 +190,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
err_xa_erase:
rxq->mp_params.mp_priv = NULL;
+ rxq->mp_params.mp_ops = NULL;
xa_erase(&binding->bound_rxqs, xa_idx);
return err;
@@ -261,9 +273,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
- owner->base_virtual = virtual;
+ owner->area.base_virtual = virtual;
owner->base_dma_addr = dma_addr;
- owner->num_niovs = len / PAGE_SIZE;
+ owner->area.num_niovs = len / PAGE_SIZE;
owner->binding = binding;
err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
@@ -275,17 +287,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
- owner->niovs = kvmalloc_array(owner->num_niovs,
- sizeof(*owner->niovs),
- GFP_KERNEL);
- if (!owner->niovs) {
+ owner->area.niovs = kvmalloc_array(owner->area.num_niovs,
+ sizeof(*owner->area.niovs),
+ GFP_KERNEL);
+ if (!owner->area.niovs) {
err = -ENOMEM;
goto err_free_chunks;
}
- for (i = 0; i < owner->num_niovs; i++) {
- niov = &owner->niovs[i];
- niov->owner = owner;
+ for (i = 0; i < owner->area.num_niovs; i++) {
+ niov = &owner->area.niovs[i];
+ niov->owner = &owner->area;
page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
net_devmem_get_dma_addr(niov));
}
@@ -313,26 +325,6 @@ err_put_dmabuf:
return ERR_PTR(err);
}
-void dev_dmabuf_uninstall(struct net_device *dev)
-{
- struct net_devmem_dmabuf_binding *binding;
- struct netdev_rx_queue *rxq;
- unsigned long xa_idx;
- unsigned int i;
-
- for (i = 0; i < dev->real_num_rx_queues; i++) {
- binding = dev->_rx[i].mp_params.mp_priv;
- if (!binding)
- continue;
-
- xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
- if (rxq == &dev->_rx[i]) {
- xa_erase(&binding->bound_rxqs, xa_idx);
- break;
- }
- }
-}
-
/*** "Dmabuf devmem memory provider" ***/
int mp_dmabuf_devmem_init(struct page_pool *pool)
@@ -398,3 +390,36 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
/* We don't want the page pool put_page()ing our net_iovs. */
return false;
}
+
+static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp,
+ struct netdev_rx_queue *rxq)
+{
+ const struct net_devmem_dmabuf_binding *binding = mp_priv;
+ int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF;
+
+ return nla_put_u32(rsp, type, binding->id);
+}
+
+static void mp_dmabuf_devmem_uninstall(void *mp_priv,
+ struct netdev_rx_queue *rxq)
+{
+ struct net_devmem_dmabuf_binding *binding = mp_priv;
+ struct netdev_rx_queue *bound_rxq;
+ unsigned long xa_idx;
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) {
+ if (bound_rxq == rxq) {
+ xa_erase(&binding->bound_rxqs, xa_idx);
+ break;
+ }
+ }
+}
+
+static const struct memory_provider_ops dmabuf_devmem_ops = {
+ .init = mp_dmabuf_devmem_init,
+ .destroy = mp_dmabuf_devmem_destroy,
+ .alloc_netmems = mp_dmabuf_devmem_alloc_netmems,
+ .release_netmem = mp_dmabuf_devmem_release_page,
+ .nl_fill = mp_dmabuf_devmem_nl_fill,
+ .uninstall = mp_dmabuf_devmem_uninstall,
+};
diff --git a/net/core/devmem.h b/net/core/devmem.h
index 76099ef9c482..7fc158d52729 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -10,6 +10,8 @@
#ifndef _NET_DEVMEM_H
#define _NET_DEVMEM_H
+#include <net/netmem.h>
+
struct netlink_ext_ack;
struct net_devmem_dmabuf_binding {
@@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding {
* allocations from this chunk.
*/
struct dmabuf_genpool_chunk_owner {
- /* Offset into the dma-buf where this chunk starts. */
- unsigned long base_virtual;
+ struct net_iov_area area;
+ struct net_devmem_dmabuf_binding *binding;
/* dma_addr of the start of the chunk. */
dma_addr_t base_dma_addr;
-
- /* Array of net_iovs for this chunk. */
- struct net_iov *niovs;
- size_t num_niovs;
-
- struct net_devmem_dmabuf_binding *binding;
};
void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
@@ -72,38 +68,34 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding,
struct netlink_ext_ack *extack);
-void dev_dmabuf_uninstall(struct net_device *dev);
static inline struct dmabuf_genpool_chunk_owner *
-net_iov_owner(const struct net_iov *niov)
+net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
{
- return niov->owner;
+ struct net_iov_area *owner = net_iov_owner(niov);
+
+ return container_of(owner, struct dmabuf_genpool_chunk_owner, area);
}
-static inline unsigned int net_iov_idx(const struct net_iov *niov)
+static inline struct net_devmem_dmabuf_binding *
+net_devmem_iov_binding(const struct net_iov *niov)
{
- return niov - net_iov_owner(niov)->niovs;
+ return net_devmem_iov_to_chunk_owner(niov)->binding;
}
-static inline struct net_devmem_dmabuf_binding *
-net_iov_binding(const struct net_iov *niov)
+static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
- return net_iov_owner(niov)->binding;
+ return net_devmem_iov_binding(niov)->id;
}
static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
{
- struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+ struct net_iov_area *owner = net_iov_owner(niov);
return owner->base_virtual +
((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
}
-static inline u32 net_iov_binding_id(const struct net_iov *niov)
-{
- return net_iov_owner(niov)->binding->id;
-}
-
static inline void
net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
{
@@ -123,6 +115,8 @@ struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
void net_devmem_free_dmabuf(struct net_iov *ppiov);
+bool net_is_devmem_iov(struct net_iov *niov);
+
#else
struct net_devmem_dmabuf_binding;
@@ -152,10 +146,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return -EOPNOTSUPP;
}
-static inline void dev_dmabuf_uninstall(struct net_device *dev)
-{
-}
-
static inline struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
{
@@ -171,10 +161,15 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
return 0;
}
-static inline u32 net_iov_binding_id(const struct net_iov *niov)
+static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
return 0;
}
+
+static inline bool net_is_devmem_iov(struct net_iov *niov)
+{
+ return false;
+}
#endif
#endif /* _NET_DEVMEM_H */
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 94a7872ab231..5ddd34cbe7f6 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -373,7 +373,8 @@ static int call_fib_rule_notifiers(struct net *net,
.rule = rule,
};
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
+
/* Paired with READ_ONCE() in fib_rules_seq() */
WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1);
return call_fib_notifiers(net, event_type, &info.info);
@@ -461,9 +462,6 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
if (rule->tun_id && r->tun_id != rule->tun_id)
continue;
- if (r->fr_net != rule->fr_net)
- continue;
-
if (rule->l3mdev && r->l3mdev != rule->l3mdev)
continue;
@@ -483,11 +481,17 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
&rule->sport_range))
continue;
+ if (rule->sport_mask && r->sport_mask != rule->sport_mask)
+ continue;
+
if (fib_rule_port_range_set(&rule->dport_range) &&
!fib_rule_port_range_compare(&r->dport_range,
&rule->dport_range))
continue;
+ if (rule->dport_mask && r->dport_mask != rule->dport_mask)
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return r;
@@ -517,14 +521,40 @@ static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
}
#endif
-static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
+static int fib_nl2rule_port_mask(const struct nlattr *mask_attr,
+ const struct fib_rule_port_range *range,
+ u16 *port_mask,
+ struct netlink_ext_ack *extack)
+{
+ if (!fib_rule_port_range_valid(range)) {
+ NL_SET_ERR_MSG_ATTR(extack, mask_attr,
+ "Cannot specify port mask without port value");
+ return -EINVAL;
+ }
+
+ if (fib_rule_port_is_range(range)) {
+ NL_SET_ERR_MSG_ATTR(extack, mask_attr,
+ "Cannot specify port mask for port range");
+ return -EINVAL;
+ }
+
+ if (range->start & ~nla_get_u16(mask_attr)) {
+ NL_SET_ERR_MSG_ATTR(extack, mask_attr, "Invalid port mask");
+ return -EINVAL;
+ }
+
+ *port_mask = nla_get_u16(mask_attr);
+
+ return 0;
+}
+
+static int fib_nl2rule(struct net *net, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
struct fib_rules_ops *ops,
struct nlattr *tb[],
struct fib_rule **rule,
bool *user_priority)
{
- struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rule *nlrule = NULL;
int err = -EINVAL;
@@ -556,30 +586,18 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[FRA_PRIORITY]) {
nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]);
*user_priority = true;
- } else {
- nlrule->pref = fib_default_rule_pref(ops);
}
nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC);
if (tb[FRA_IIFNAME]) {
- struct net_device *dev;
-
nlrule->iifindex = -1;
nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(net, nlrule->iifname);
- if (dev)
- nlrule->iifindex = dev->ifindex;
}
if (tb[FRA_OIFNAME]) {
- struct net_device *dev;
-
nlrule->oifindex = -1;
nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(net, nlrule->oifname);
- if (dev)
- nlrule->oifindex = dev->ifindex;
}
if (tb[FRA_FWMARK]) {
@@ -621,11 +639,6 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
nlrule->target = nla_get_u32(tb[FRA_GOTO]);
- /* Backward jumps are prohibited to avoid endless loops */
- if (nlrule->target <= nlrule->pref) {
- NL_SET_ERR_MSG(extack, "Backward goto not supported");
- goto errout_free;
- }
} else if (nlrule->action == FR_ACT_GOTO) {
NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
goto errout_free;
@@ -664,6 +677,16 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
NL_SET_ERR_MSG(extack, "Invalid sport range");
goto errout_free;
}
+ if (!fib_rule_port_is_range(&nlrule->sport_range))
+ nlrule->sport_mask = U16_MAX;
+ }
+
+ if (tb[FRA_SPORT_MASK]) {
+ err = fib_nl2rule_port_mask(tb[FRA_SPORT_MASK],
+ &nlrule->sport_range,
+ &nlrule->sport_mask, extack);
+ if (err)
+ goto errout_free;
}
if (tb[FRA_DPORT_RANGE]) {
@@ -673,6 +696,16 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
NL_SET_ERR_MSG(extack, "Invalid dport range");
goto errout_free;
}
+ if (!fib_rule_port_is_range(&nlrule->dport_range))
+ nlrule->dport_mask = U16_MAX;
+ }
+
+ if (tb[FRA_DPORT_MASK]) {
+ err = fib_nl2rule_port_mask(tb[FRA_DPORT_MASK],
+ &nlrule->dport_range,
+ &nlrule->dport_mask, extack);
+ if (err)
+ goto errout_free;
}
*rule = nlrule;
@@ -685,6 +718,39 @@ errout:
return err;
}
+static int fib_nl2rule_rtnl(struct fib_rule *nlrule,
+ struct fib_rules_ops *ops,
+ struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ if (!tb[FRA_PRIORITY])
+ nlrule->pref = fib_default_rule_pref(ops);
+
+ /* Backward jumps are prohibited to avoid endless loops */
+ if (tb[FRA_GOTO] && nlrule->target <= nlrule->pref) {
+ NL_SET_ERR_MSG(extack, "Backward goto not supported");
+ return -EINVAL;
+ }
+
+ if (tb[FRA_IIFNAME]) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname);
+ if (dev)
+ nlrule->iifindex = dev->ifindex;
+ }
+
+ if (tb[FRA_OIFNAME]) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname);
+ if (dev)
+ nlrule->oifindex = dev->ifindex;
+ }
+
+ return 0;
+}
+
static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
struct nlattr **tb, struct fib_rule *rule)
{
@@ -721,9 +787,6 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
if (r->tun_id != rule->tun_id)
continue;
- if (r->fr_net != rule->fr_net)
- continue;
-
if (r->l3mdev != rule->l3mdev)
continue;
@@ -741,10 +804,16 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
&rule->sport_range))
continue;
+ if (r->sport_mask != rule->sport_mask)
+ continue;
+
if (!fib_rule_port_range_compare(&r->dport_range,
&rule->dport_range))
continue;
+ if (r->dport_mask != rule->dport_mask)
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
@@ -774,17 +843,18 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
[FRA_FLOWLABEL] = { .type = NLA_BE32 },
[FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 },
+ [FRA_SPORT_MASK] = { .type = NLA_U16 },
+ [FRA_DPORT_MASK] = { .type = NLA_U16 },
};
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held)
{
- struct net *net = sock_net(skb->sk);
+ struct fib_rule *rule = NULL, *r, *last = NULL;
struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ int err = -EINVAL, unresolved = 0;
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule = NULL, *r, *last = NULL;
struct nlattr *tb[FRA_MAX + 1];
- int err = -EINVAL, unresolved = 0;
bool user_priority = false;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
@@ -806,10 +876,17 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
+ err = fib_nl2rule(net, nlh, extack, ops, tb, &rule, &user_priority);
if (err)
goto errout;
+ if (!rtnl_held)
+ rtnl_net_lock(net);
+
+ err = fib_nl2rule_rtnl(rule, ops, tb, extack);
+ if (err)
+ goto errout_free;
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
@@ -871,29 +948,42 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
+ fib_rule_get(rule);
+
+ if (!rtnl_held)
+ rtnl_net_unlock(net);
+
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
+ fib_rule_put(rule);
flush_route_cache(ops);
rules_ops_put(ops);
return 0;
errout_free:
+ if (!rtnl_held)
+ rtnl_net_unlock(net);
kfree(rule);
errout:
rules_ops_put(ops);
return err;
}
-EXPORT_SYMBOL_GPL(fib_nl_newrule);
+EXPORT_SYMBOL_GPL(fib_newrule);
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(skb->sk);
+ return fib_newrule(sock_net(skb->sk), skb, nlh, extack, false);
+}
+
+int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held)
+{
+ struct fib_rule *rule = NULL, *nlrule = NULL;
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule = NULL, *r, *nlrule = NULL;
struct nlattr *tb[FRA_MAX+1];
- int err = -EINVAL;
bool user_priority = false;
+ int err = -EINVAL;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
NL_SET_ERR_MSG(extack, "Invalid msg length");
@@ -914,25 +1004,32 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
+ err = fib_nl2rule(net, nlh, extack, ops, tb, &nlrule, &user_priority);
if (err)
goto errout;
+ if (!rtnl_held)
+ rtnl_net_lock(net);
+
+ err = fib_nl2rule_rtnl(nlrule, ops, tb, extack);
+ if (err)
+ goto errout_free;
+
rule = rule_find(ops, frh, tb, nlrule, user_priority);
if (!rule) {
err = -ENOENT;
- goto errout;
+ goto errout_free;
}
if (rule->flags & FIB_RULE_PERMANENT) {
err = -EPERM;
- goto errout;
+ goto errout_free;
}
if (ops->delete) {
err = ops->delete(rule);
if (err)
- goto errout;
+ goto errout_free;
}
if (rule->tun_id)
@@ -954,7 +1051,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
* current if it is goto rule, have actually been added.
*/
if (ops->nr_goto_rules > 0) {
- struct fib_rule *n;
+ struct fib_rule *n, *r;
n = list_next_entry(rule, list);
if (&n->list == &ops->rules_list || n->pref != rule->pref)
@@ -968,22 +1065,33 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
- NULL);
- notify_rule_change(RTM_DELRULE, rule, ops, nlh,
- NETLINK_CB(skb).portid);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, NULL);
+
+ if (!rtnl_held)
+ rtnl_net_unlock(net);
+
+ notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
fib_rule_put(rule);
flush_route_cache(ops);
rules_ops_put(ops);
kfree(nlrule);
return 0;
-errout:
+errout_free:
+ if (!rtnl_held)
+ rtnl_net_unlock(net);
kfree(nlrule);
+errout:
rules_ops_put(ops);
return err;
}
-EXPORT_SYMBOL_GPL(fib_nl_delrule);
+EXPORT_SYMBOL_GPL(fib_delrule);
+
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return fib_delrule(sock_net(skb->sk), skb, nlh, extack, false);
+}
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
struct fib_rule *rule)
@@ -1002,7 +1110,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(1) /* FRA_PROTOCOL */
+ nla_total_size(1) /* FRA_IP_PROTO */
+ nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
- + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
+ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_DPORT_RANGE */
+ + nla_total_size(2) /* FRA_SPORT_MASK */
+ + nla_total_size(2); /* FRA_DPORT_MASK */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -1070,8 +1180,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
nla_put_uid_range(skb, &rule->uid_range)) ||
(fib_rule_port_range_set(&rule->sport_range) &&
nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+ (rule->sport_mask && nla_put_u16(skb, FRA_SPORT_MASK,
+ rule->sport_mask)) ||
(fib_rule_port_range_set(&rule->dport_range) &&
nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+ (rule->dport_mask && nla_put_u16(skb, FRA_DPORT_MASK,
+ rule->dport_mask)) ||
(rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
goto nla_put_failure;
@@ -1295,8 +1409,10 @@ static struct pernet_operations fib_rules_net_ops = {
};
static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = {
- {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule},
- {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule},
+ {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule,
+ .flags = RTNL_FLAG_DOIT_PERNET},
{.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule,
.flags = RTNL_FLAG_DUMP_UNLOCKED},
};
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bd0251bd74a1..d8dd686b5287 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
return -ENOENT;
}
-static void neigh_parms_destroy(struct neigh_parms *parms);
-
static inline void neigh_parms_put(struct neigh_parms *parms)
{
if (refcount_dec_and_test(&parms->refcnt))
- neigh_parms_destroy(parms);
+ kfree(parms);
}
/*
@@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
}
EXPORT_SYMBOL(neigh_parms_release);
-static void neigh_parms_destroy(struct neigh_parms *parms)
-{
- kfree(parms);
-}
-
static struct lock_class_key neigh_table_proxy_queue_class;
static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 07cb99b114bd..3fe2c521e574 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev)
return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}
+/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,
+ * when unregistering a net device and accessing associated sysfs files. The
+ * potential deadlock is as follow:
+ *
+ * CPU 0 CPU 1
+ *
+ * rtnl_lock vfs_read
+ * unregister_netdevice_many kernfs_seq_start
+ * device_del / kobject_put kernfs_get_active (kn->active++)
+ * kernfs_drain sysfs_kf_seq_show
+ * wait_event( rtnl_lock
+ * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release
+ * -> waits on CPU 1 to decrease kn->active the rtnl lock.
+ *
+ * The historical fix was to use rtnl_trylock with restart_syscall to bail out
+ * of sysfs operations when the lock couldn't be taken. This fixed the above
+ * issue as it allowed CPU 1 to bail out of the ABBA situation.
+ *
+ * But it came with performances issues, as syscalls are being restarted in
+ * loops when there was contention on the rtnl lock, with huge slow downs in
+ * specific scenarios (e.g. lots of virtual interfaces created and userspace
+ * daemons querying their attributes).
+ *
+ * The idea below is to bail out of the active kernfs_node protection
+ * (kn->active) while trying to take the rtnl lock.
+ *
+ * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
+ * net device is guaranteed to be alive if this returns successfully.
+ */
+static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
+ struct net_device *ndev)
+{
+ struct kernfs_node *kn;
+ int ret = 0;
+
+ /* First, we hold a reference to the net device as the unregistration
+ * path might run in parallel. This will ensure the net device and the
+ * associated sysfs objects won't be freed while we try to take the rtnl
+ * lock.
+ */
+ dev_hold(ndev);
+ /* sysfs_break_active_protection was introduced to allow self-removal of
+ * devices and their associated sysfs files by bailing out of the
+ * sysfs/kernfs protection. We do this here to allow the unregistration
+ * path to complete in parallel. The following takes a reference on the
+ * kobject and the kernfs_node being accessed.
+ *
+ * This works because we hold a reference onto the net device and the
+ * unregistration path will wait for us eventually in netdev_run_todo
+ * (outside an rtnl lock section).
+ */
+ kn = sysfs_break_active_protection(kobj, attr);
+ /* We can now try to take the rtnl lock. This can't deadlock us as the
+ * unregistration path is able to drain sysfs files (kernfs_node) thanks
+ * to the above dance.
+ */
+ if (rtnl_lock_interruptible()) {
+ ret = -ERESTARTSYS;
+ goto unbreak;
+ }
+ /* Check dismantle on the device hasn't started, otherwise deny the
+ * operation.
+ */
+ if (!dev_isalive(ndev)) {
+ rtnl_unlock();
+ ret = -ENODEV;
+ goto unbreak;
+ }
+ /* We are now sure the device dismantle hasn't started nor that it can
+ * start before we exit the locking section as we hold the rtnl lock.
+ * There's no need to keep unbreaking the sysfs protection nor to hold
+ * a net device reference from that point; that was only needed to take
+ * the rtnl lock.
+ */
+unbreak:
+ sysfs_unbreak_active_protection(kn);
+ dev_put(ndev);
+
+ return ret;
+}
+
/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct device *dev,
struct device_attribute *attr, char *buf,
@@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
if (ret)
goto err;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ goto err;
+
+ ret = (*set)(netdev, new);
+ if (ret == 0)
+ ret = len;
- if (dev_isalive(netdev)) {
- ret = (*set)(netdev, new);
- if (ret == 0)
- ret = len;
- }
rtnl_unlock();
err:
return ret;
@@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
/* The check is also done in change_carrier; this helps returning early
- * without hitting the trylock/restart in netdev_store.
+ * without hitting the locking section in netdev_store.
*/
if (!netdev->netdev_ops->ndo_change_carrier)
return -EOPNOTSUPP;
@@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
/* Synchronize carrier state with link watch,
@@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev,
ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
}
- rtnl_unlock();
+ rtnl_unlock();
return ret;
}
static DEVICE_ATTR_RW(carrier);
@@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- ret = dev_set_alias(netdev, buf, count);
- if (ret < 0)
- goto err;
- ret = len;
- netdev_state_change(netdev);
- }
+ ret = dev_set_alias(netdev, buf, count);
+ if (ret < 0)
+ goto err;
+ ret = len;
+ netdev_state_change(netdev);
err:
rtnl_unlock();
@@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ struct netdev_phys_item_id ppid;
ssize_t ret = -EINVAL;
/* The check is also done in dev_get_phys_port_id; this helps returning
- * early without hitting the trylock/restart below.
+ * early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_id)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid;
+ ret = dev_get_phys_port_id(netdev, &ppid);
+ if (!ret)
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- ret = dev_get_phys_port_id(netdev, &ppid);
- if (!ret)
- ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- }
rtnl_unlock();
return ret;
@@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ char name[IFNAMSIZ];
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_name &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- char name[IFNAMSIZ];
+ ret = dev_get_phys_port_name(netdev, name, sizeof(name));
+ if (!ret)
+ ret = sysfs_emit(buf, "%s\n", name);
- ret = dev_get_phys_port_name(netdev, name, sizeof(name));
- if (!ret)
- ret = sysfs_emit(buf, "%s\n", name);
- }
rtnl_unlock();
return ret;
@@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ struct netdev_phys_item_id ppid = { };
ssize_t ret = -EINVAL;
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below. This works
+ * returning early without hitting the locking section below. This works
* because recurse is false when calling dev_get_port_parent_id.
*/
if (!netdev->netdev_ops->ndo_get_port_parent_id &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid = { };
+ ret = dev_get_port_parent_id(netdev, &ppid, false);
+ if (!ret)
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- ret = dev_get_port_parent_id(netdev, &ppid, false);
- if (!ret)
- ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- }
rtnl_unlock();
return ret;
@@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type rx_queue_ktype = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
- .default_groups = rx_queue_default_groups,
.namespace = rx_queue_namespace,
.get_ownership = rx_queue_get_ownership,
};
@@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Rx queues are cleared in rx_queue_release to allow later
+ * re-registration. This is triggered when their kobj refcount is
+ * dropped.
+ *
+ * If a queue is removed while both a read (or write) operation and a
+ * the re-addition of the same queue are pending (waiting on rntl_lock)
+ * it might happen that the re-addition will execute before the read,
+ * making the initial removal to never happen (queue's kobj refcount
+ * won't drop enough because of the pending read). In such rare case,
+ * return to allow the removal operation to complete.
+ */
+ if (unlikely(kobj->state_initialized)) {
+ netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
+ return -EAGAIN;
+ }
+
/* Kobject_put later will trigger rx_queue_release call which
* decreases dev refcount: Take that reference here
*/
@@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
+ queue->groups = rx_queue_default_groups;
+ error = sysfs_create_groups(kobj, queue->groups);
+ if (error)
+ goto err;
+
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
- goto err;
+ goto err_default_groups;
}
error = rx_queue_default_mask(dev, queue);
if (error)
- goto err;
+ goto err_default_groups;
kobject_uevent(kobj, KOBJ_ADD);
return error;
+err_default_groups:
+ sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
}
while (--i >= new_num) {
- struct kobject *kobj = &dev->_rx[i].kobj;
+ struct netdev_rx_queue *queue = &dev->_rx[i];
+ struct kobject *kobj = &queue->kobj;
if (!refcount_read(&dev_net(dev)->ns.count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
+ sysfs_remove_groups(kobj, queue->groups);
kobject_put(kobj);
}
@@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num,
*/
struct netdev_queue_attribute {
struct attribute attr;
- ssize_t (*show)(struct netdev_queue *queue, char *buf);
- ssize_t (*store)(struct netdev_queue *queue,
- const char *buf, size_t len);
+ ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len);
};
#define to_netdev_queue_attr(_attr) \
container_of(_attr, struct netdev_queue_attribute, attr)
@@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj,
if (!attribute->show)
return -EIO;
- return attribute->show(queue, buf);
+ return attribute->show(kobj, attr, queue, buf);
}
static ssize_t netdev_queue_attr_store(struct kobject *kobj,
@@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj,
if (!attribute->store)
return -EIO;
- return attribute->store(queue, buf, count);
+ return attribute->store(kobj, attr, queue, buf, count);
}
static const struct sysfs_ops netdev_queue_sysfs_ops = {
@@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
-static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
+static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
@@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
return i;
}
-static ssize_t traffic_class_show(struct netdev_queue *queue,
- char *buf)
+static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
- int num_tc, tc;
- int index;
+ int num_tc, tc, index, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
+ if (ret)
+ return ret;
index = get_netdev_queue_index(queue);
@@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
-static ssize_t tx_maxrate_show(struct netdev_queue *queue,
- char *buf)
+static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
}
-static ssize_t tx_maxrate_store(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
- struct net_device *dev = queue->dev;
int err, index = get_netdev_queue_index(queue);
+ struct net_device *dev = queue->dev;
u32 rate = 0;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* The check is also done later; this helps returning early without
- * hitting the trylock/restart below.
+ * hitting the locking section below.
*/
if (!dev->netdev_ops->ndo_set_tx_maxrate)
return -EOPNOTSUPP;
@@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (err < 0)
return err;
- if (!rtnl_trylock())
- return restart_syscall();
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err)
+ return err;
err = -EOPNOTSUPP;
if (dev->netdev_ops->ndo_set_tx_maxrate)
err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
- rtnl_unlock();
if (!err) {
queue->tx_maxrate = rate;
+ rtnl_unlock();
return len;
}
+
+ rtnl_unlock();
return err;
}
@@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count,
return count;
}
-static ssize_t bql_show_hold_time(struct netdev_queue *queue,
- char *buf)
+static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
}
-static ssize_t bql_set_hold_time(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
= __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
-static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
}
-static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
__ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
-static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
}
-static ssize_t bql_set_stall_max(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
WRITE_ONCE(queue->dql.stall_max, 0);
return len;
@@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
__ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
-static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
__ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
-static ssize_t bql_show_inflight(struct netdev_queue *queue,
- char *buf)
+static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
__ATTR(inflight, 0444, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
-static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
- char *buf) \
+static ssize_t bql_show_ ## NAME(struct kobject *kobj, \
+ struct attribute *attr, \
+ struct netdev_queue *queue, char *buf) \
{ \
return bql_show(buf, queue->dql.FIELD); \
} \
\
-static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
+static ssize_t bql_set_ ## NAME(struct kobject *kobj, \
+ struct attribute *attr, \
+ struct netdev_queue *queue, \
const char *buf, size_t len) \
{ \
return bql_set(buf, len, &queue->dql.FIELD); \
@@ -1613,19 +1733,21 @@ out_no_maps:
return len < PAGE_SIZE ? len : -EINVAL;
}
-static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
+static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
- int len, tc;
+ int len, tc, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
+ if (ret)
+ return ret;
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
@@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
return -EINVAL;
}
- /* Make sure the subordinate device can't be freed */
- get_device(&dev->dev);
+ /* Increase the net device refcnt to make sure it won't be freed while
+ * xps_queue_show is running.
+ */
+ dev_hold(dev);
rtnl_unlock();
len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
- put_device(&dev->dev);
+ dev_put(dev);
return len;
}
-static ssize_t xps_cpus_store(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct net_device *dev = queue->dev;
unsigned int index;
@@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
return err;
}
- if (!rtnl_trylock()) {
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err) {
free_cpumask_var(mask);
- return restart_syscall();
+ return err;
}
err = netif_set_xps_queue(dev, mask, index);
@@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
= __ATTR_RW(xps_cpus);
-static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
+static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
- int tc;
+ int tc, ret;
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, dev);
+ if (ret)
+ return ret;
tc = netdev_txq_to_tc(dev, index);
+
+ /* Increase the net device refcnt to make sure it won't be freed while
+ * xps_queue_show is running.
+ */
+ dev_hold(dev);
rtnl_unlock();
- if (tc < 0)
- return -EINVAL;
- return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
+ ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL;
+ dev_put(dev);
+ return ret;
}
-static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
+static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
size_t len)
{
struct net_device *dev = queue->dev;
@@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
return err;
}
- if (!rtnl_trylock()) {
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err) {
bitmap_free(mask);
- return restart_syscall();
+ return err;
}
cpus_read_lock();
@@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
- .default_groups = netdev_queue_default_groups,
.namespace = netdev_queue_namespace,
.get_ownership = netdev_queue_get_ownership,
};
@@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Tx queues are cleared in netdev_queue_release to allow later
+ * re-registration. This is triggered when their kobj refcount is
+ * dropped.
+ *
+ * If a queue is removed while both a read (or write) operation and a
+ * the re-addition of the same queue are pending (waiting on rntl_lock)
+ * it might happen that the re-addition will execute before the read,
+ * making the initial removal to never happen (queue's kobj refcount
+ * won't drop enough because of the pending read). In such rare case,
+ * return to allow the removal operation to complete.
+ */
+ if (unlikely(kobj->state_initialized)) {
+ netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
+ return -EAGAIN;
+ }
+
/* Kobject_put later will trigger netdev_queue_release call
* which decreases dev refcount: Take that reference here
*/
@@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
+ queue->groups = netdev_queue_default_groups;
+ error = sysfs_create_groups(kobj, queue->groups);
+ if (error)
+ goto err;
+
if (netdev_uses_bql(dev)) {
error = sysfs_create_group(kobj, &dql_group);
if (error)
- goto err;
+ goto err_default_groups;
}
kobject_uevent(kobj, KOBJ_ADD);
return 0;
+err_default_groups:
+ sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
if (netdev_uses_bql(dev))
sysfs_remove_group(&queue->kobj, &dql_group);
+ sysfs_remove_groups(&queue->kobj, queue->groups);
kobject_put(&queue->kobj);
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 715f85c6b62e..c92fba65b20d 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -10,6 +10,7 @@
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
+#include <net/page_pool/memory_provider.h>
#include "dev.h"
#include "devmem.h"
@@ -266,7 +267,7 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
prev_id = UINT_MAX;
list_for_each_entry(napi, &netdev->napi_list, dev_list) {
- if (napi->napi_id < MIN_NAPI_ID)
+ if (!napi_id_valid(napi->napi_id))
continue;
/* Dump continuation below depends on the list being sorted */
@@ -364,11 +365,18 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
+{
+ if (napi && napi_id_valid(napi->napi_id))
+ return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id);
+ return 0;
+}
+
static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
- struct net_devmem_dmabuf_binding *binding;
+ struct pp_memory_provider_params *params;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
@@ -385,21 +393,30 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
switch (q_type) {
case NETDEV_QUEUE_TYPE_RX:
rxq = __netif_get_rx_queue(netdev, q_idx);
- if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
- rxq->napi->napi_id))
+ if (nla_put_napi_id(rsp, rxq->napi))
goto nla_put_failure;
- binding = rxq->mp_params.mp_priv;
- if (binding &&
- nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id))
+ params = &rxq->mp_params;
+ if (params->mp_ops &&
+ params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
+#ifdef CONFIG_XDP_SOCKETS
+ if (rxq->pool)
+ if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
+ goto nla_put_failure;
+#endif
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
- if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
- txq->napi->napi_id))
+ if (nla_put_napi_id(rsp, txq->napi))
goto nla_put_failure;
+#ifdef CONFIG_XDP_SOCKETS
+ if (txq->pool)
+ if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
+ goto nla_put_failure;
+#endif
+ break;
}
genlmsg_end(rsp, hdr);
@@ -576,6 +593,7 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index db82786fa0c4..ddd54e1e7289 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -3,34 +3,34 @@
#include <linux/netdevice.h>
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/memory_provider.h>
#include "page_pool_priv.h"
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
{
struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx);
+ const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
void *new_mem, *old_mem;
int err;
- if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop ||
- !dev->queue_mgmt_ops->ndo_queue_mem_free ||
- !dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
- !dev->queue_mgmt_ops->ndo_queue_start)
+ if (!qops || !qops->ndo_queue_stop || !qops->ndo_queue_mem_free ||
+ !qops->ndo_queue_mem_alloc || !qops->ndo_queue_start)
return -EOPNOTSUPP;
ASSERT_RTNL();
- new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL);
if (!new_mem)
return -ENOMEM;
- old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ old_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL);
if (!old_mem) {
err = -ENOMEM;
goto err_free_new_mem;
}
- err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
+ err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
if (err)
goto err_free_old_mem;
@@ -38,15 +38,19 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
if (err)
goto err_free_new_queue_mem;
- err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx);
- if (err)
- goto err_free_new_queue_mem;
+ if (netif_running(dev)) {
+ err = qops->ndo_queue_stop(dev, old_mem, rxq_idx);
+ if (err)
+ goto err_free_new_queue_mem;
- err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx);
- if (err)
- goto err_start_queue;
+ err = qops->ndo_queue_start(dev, new_mem, rxq_idx);
+ if (err)
+ goto err_start_queue;
+ } else {
+ swap(new_mem, old_mem);
+ }
- dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+ qops->ndo_queue_mem_free(dev, old_mem);
kvfree(old_mem);
kvfree(new_mem);
@@ -61,15 +65,15 @@ err_start_queue:
* WARN if we fail to recover the old rx queue, and at least free
* old_mem so we don't also leak that.
*/
- if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) {
+ if (qops->ndo_queue_start(dev, old_mem, rxq_idx)) {
WARN(1,
"Failed to restart old queue in error path. RX queue %d may be unhealthy.",
rxq_idx);
- dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+ qops->ndo_queue_mem_free(dev, old_mem);
}
err_free_new_queue_mem:
- dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
+ qops->ndo_queue_mem_free(dev, new_mem);
err_free_old_mem:
kvfree(old_mem);
@@ -80,3 +84,71 @@ err_free_new_mem:
return err;
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
+
+static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p)
+{
+ struct netdev_rx_queue *rxq;
+ int ret;
+
+ if (ifq_idx >= dev->real_num_rx_queues)
+ return -EINVAL;
+ ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues);
+
+ rxq = __netif_get_rx_queue(dev, ifq_idx);
+ if (rxq->mp_params.mp_ops)
+ return -EEXIST;
+
+ rxq->mp_params = *p;
+ ret = netdev_rx_queue_restart(dev, ifq_idx);
+ if (ret) {
+ rxq->mp_params.mp_ops = NULL;
+ rxq->mp_params.mp_priv = NULL;
+ }
+ return ret;
+}
+
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = __net_mp_open_rxq(dev, ifq_idx, p);
+ rtnl_unlock();
+ return ret;
+}
+
+static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p)
+{
+ struct netdev_rx_queue *rxq;
+
+ if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
+ return;
+
+ rxq = __netif_get_rx_queue(dev, ifq_idx);
+
+ /* Callers holding a netdev ref may get here after we already
+ * went thru shutdown via dev_memory_provider_uninstall().
+ */
+ if (dev->reg_state > NETREG_REGISTERED &&
+ !rxq->mp_params.mp_ops)
+ return;
+
+ if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
+ rxq->mp_params.mp_priv != old_p->mp_priv))
+ return;
+
+ rxq->mp_params.mp_ops = NULL;
+ rxq->mp_params.mp_priv = NULL;
+ WARN_ON(netdev_rx_queue_restart(dev, ifq_idx));
+}
+
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p)
+{
+ rtnl_lock();
+ __net_mp_close_rxq(dev, ifq_idx, old_p);
+ rtnl_unlock();
+}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f5e908c9e7ad..acef1fcd8ddc 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -13,6 +13,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@@ -25,6 +26,7 @@
#include <trace/events/page_pool.h>
+#include "dev.h"
#include "mp_dmabuf_devmem.h"
#include "netmem_priv.h"
#include "page_pool_priv.h"
@@ -285,13 +287,19 @@ static int page_pool_init(struct page_pool *pool,
rxq = __netif_get_rx_queue(pool->slow.netdev,
pool->slow.queue_idx);
pool->mp_priv = rxq->mp_params.mp_priv;
+ pool->mp_ops = rxq->mp_params.mp_ops;
}
- if (pool->mp_priv) {
+ if (pool->mp_ops) {
if (!pool->dma_map || !pool->dma_sync)
return -EOPNOTSUPP;
- err = mp_dmabuf_devmem_init(pool);
+ if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) {
+ err = -EFAULT;
+ goto free_ptr_ring;
+ }
+
+ err = pool->mp_ops->init(pool);
if (err) {
pr_warn("%s() mem-provider init failed %d\n", __func__,
err);
@@ -587,8 +595,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
return netmem;
/* Slow-path: cache empty, do real allocation */
- if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
- netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+ netmem = pool->mp_ops->alloc_netmems(pool, gfp);
else
netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
@@ -679,8 +687,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
bool put;
put = true;
- if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
- put = mp_dmabuf_devmem_release_page(pool, netmem);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+ put = pool->mp_ops->release_netmem(pool, netmem);
else
__page_pool_release_page_dma(pool, netmem);
@@ -1048,8 +1056,8 @@ static void __page_pool_destroy(struct page_pool *pool)
page_pool_unlist(pool);
page_pool_uninit(pool);
- if (pool->mp_priv) {
- mp_dmabuf_devmem_destroy(pool);
+ if (pool->mp_ops) {
+ pool->mp_ops->destroy(pool);
static_branch_dec(&page_pool_mem_providers);
}
@@ -1104,7 +1112,13 @@ static void page_pool_release_retry(struct work_struct *wq)
int inflight;
inflight = page_pool_release(pool);
- if (!inflight)
+ /* In rare cases, a driver bug may cause inflight to go negative.
+ * Don't reschedule release if inflight is 0 or negative.
+ * - If 0, the page_pool has been destroyed
+ * - if negative, we will never recover
+ * in both cases no reschedule is necessary.
+ */
+ if (inflight <= 0)
return;
/* Periodic warning for page pools the user can't see */
@@ -1140,11 +1154,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool)
if (!pool->p.napi)
return;
- /* To avoid races with recycling and additional barriers make sure
- * pool and NAPI are unlinked when NAPI is disabled.
- */
- WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state));
- WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1);
+ napi_assert_will_not_race(pool->p.napi);
mutex_lock(&page_pools_lock);
WRITE_ONCE(pool->p.napi, NULL);
@@ -1190,3 +1200,31 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
+
+bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr)
+{
+ return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr);
+}
+
+/* Associate a niov with a page pool. Should follow with a matching
+ * net_mp_niov_clear_page_pool()
+ */
+void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov)
+{
+ netmem_ref netmem = net_iov_to_netmem(niov);
+
+ page_pool_set_pp_info(pool, netmem);
+
+ pool->pages_state_hold_cnt++;
+ trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
+}
+
+/* Disassociate a niov from a page pool. Should only be used in the
+ * ->release_netmem() path.
+ */
+void net_mp_niov_clear_page_pool(struct net_iov *niov)
+{
+ netmem_ref netmem = net_iov_to_netmem(niov);
+
+ page_pool_clear_pp_info(netmem);
+}
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 6677e0c2e256..c82a95beceff 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -8,9 +8,9 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/types.h>
+#include <net/page_pool/memory_provider.h>
#include <net/sock.h>
-#include "devmem.h"
#include "page_pool_priv.h"
#include "netdev-genl-gen.h"
@@ -216,7 +216,6 @@ static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
const struct genl_info *info)
{
- struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
unsigned int napi_id;
void *hdr;
@@ -234,7 +233,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
goto err_cancel;
napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0;
- if (napi_id >= MIN_NAPI_ID &&
+ if (napi_id_valid(napi_id) &&
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id))
goto err_cancel;
@@ -249,7 +248,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
pool->user.detach_time))
goto err_cancel;
- if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
+ if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL))
goto err_cancel;
genlmsg_end(rsp, hdr);
@@ -356,7 +355,7 @@ void page_pool_unlist(struct page_pool *pool)
int page_pool_check_memory_provider(struct net_device *dev,
struct netdev_rx_queue *rxq)
{
- struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
+ void *binding = rxq->mp_params.mp_priv;
struct page_pool *pool;
struct hlist_node *n;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1e559fce918..abe1a461ea67 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -80,6 +80,11 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_interruptible(void)
+{
+ return mutex_lock_interruptible(&rtnl_mutex);
+}
+
int rtnl_lock_killable(void)
{
return mutex_lock_killable(&rtnl_mutex);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index b0ff6153be62..568779d5a0ef 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -71,7 +71,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
return siphash(&combined, offsetofend(typeof(combined), daddr),
&ts_secret);
}
-EXPORT_SYMBOL(secure_tcpv6_ts_off);
+EXPORT_IPV6_MOD(secure_tcpv6_ts_off);
u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport)
diff --git a/net/core/sock.c b/net/core/sock.c
index eae2ae70a2e0..0d385bf27b38 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -938,6 +938,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
WRITE_ONCE(sk->sk_tsflags, val);
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_ANY, !!(val & TSFLAGS_ANY));
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk,
@@ -2041,7 +2042,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
v.val = READ_ONCE(sk->sk_napi_id);
/* aggregate non-NAPI IDs down to 0 */
- if (v.val < MIN_NAPI_ID)
+ if (!napi_id_valid(v.val))
v.val = 0;
break;
@@ -3881,7 +3882,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
- mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
+ mem[SK_MEMINFO_FWD_ALLOC] = READ_ONCE(sk->sk_forward_alloc);
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index be515ba821e2..bfa529a54aca 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -426,9 +426,6 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- sk_daddr_set(newsk, ireq->ir_rmt_addr);
- sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
- newinet->inet_saddr = ireq->ir_loc_addr;
RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d6649246188d..39ae9d89d7d4 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -365,6 +365,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+ ireq->ir_rmt_addr = LOOPBACK4_IPV6;
+ ireq->ir_loc_addr = LOOPBACK4_IPV6;
+
ireq->ireq_family = AF_INET6;
ireq->ir_mark = inet_request_mark(sk, skb);
@@ -504,10 +507,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
newnp->saddr = ireq->ir_v6_loc_addr;
- newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
- newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
@@ -546,9 +546,6 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
dccp_sync_mss(newsk, dst_mtu(dst));
- newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
- newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
-
if (__inet_inherit_port(sk, newsk) < 0) {
inet_csk_prepare_forced_close(newsk);
dccp_done(newsk);
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 291ab1b4acc4..2296a4ead020 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -1243,16 +1243,25 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e)
if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index))
return -EOPNOTSUPP;
- /* Port's PHY and MAC both need to be EEE capable */
- if (!dev->phydev)
- return -ENODEV;
+ /* If the port is using phylink managed EEE, then an unimplemented
+ * set_mac_eee() is permissible.
+ */
+ if (!phylink_mac_implements_lpi(ds->phylink_mac_ops)) {
+ /* Port's PHY and MAC both need to be EEE capable */
+ if (!dev->phydev)
+ return -ENODEV;
- if (!ds->ops->set_mac_eee)
- return -EOPNOTSUPP;
+ if (!ds->ops->set_mac_eee)
+ return -EOPNOTSUPP;
- ret = ds->ops->set_mac_eee(ds, dp->index, e);
- if (ret)
- return ret;
+ ret = ds->ops->set_mac_eee(ds, dp->index, e);
+ if (ret)
+ return ret;
+ } else if (ds->ops->set_mac_eee) {
+ ret = ds->ops->set_mac_eee(ds, dp->index, e);
+ if (ret)
+ return ret;
+ }
return phylink_ethtool_set_eee(dp->pl, e);
}
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index d88e9080643b..7149d07e90c6 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -213,6 +213,24 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(10, T1S, Half),
__DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half),
__DEFINE_LINK_MODE_NAME(10, T1BRR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR_2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, VR, Full),
+ __DEFINE_LINK_MODE_NAME(400000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR2_2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, VR2, Full),
+ __DEFINE_LINK_MODE_NAME(800000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, DR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, DR4_2, Full),
+ __DEFINE_LINK_MODE_NAME(800000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, VR4, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -221,8 +239,11 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_CR4 4
#define __LINK_MODE_LANES_CR8 8
#define __LINK_MODE_LANES_DR 1
+#define __LINK_MODE_LANES_DR_2 1
#define __LINK_MODE_LANES_DR2 2
+#define __LINK_MODE_LANES_DR2_2 2
#define __LINK_MODE_LANES_DR4 4
+#define __LINK_MODE_LANES_DR4_2 4
#define __LINK_MODE_LANES_DR8 8
#define __LINK_MODE_LANES_KR 1
#define __LINK_MODE_LANES_KR2 2
@@ -251,6 +272,9 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_T1L 1
#define __LINK_MODE_LANES_T1S 1
#define __LINK_MODE_LANES_T1S_P2MP 1
+#define __LINK_MODE_LANES_VR 1
+#define __LINK_MODE_LANES_VR2 2
+#define __LINK_MODE_LANES_VR4 4
#define __LINK_MODE_LANES_VR8 8
#define __LINK_MODE_LANES_DR8_2 8
#define __LINK_MODE_LANES_T1BRR 1
@@ -378,6 +402,24 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(10, T1S, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, VR, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, VR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, DR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 7609ce2b2c5e..271c7cef9ef3 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -992,11 +992,17 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (rc)
return rc;
- /* Nonzero ring with RSS only makes sense if NIC adds them together */
- if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS &&
- !ops->cap_rss_rxnfc_adds &&
- ethtool_get_flow_spec_ring(info.fs.ring_cookie))
- return -EINVAL;
+ if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS) {
+ /* Nonzero ring with RSS only makes sense
+ * if NIC adds them together
+ */
+ if (!ops->cap_rss_rxnfc_adds &&
+ ethtool_get_flow_spec_ring(info.fs.ring_cookie))
+ return -EINVAL;
+
+ if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context))
+ return -EINVAL;
+ }
if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) {
struct ethtool_rxfh_param rxfh = {};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 21f46ee7b6e9..5df1f1325259 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc));
WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
WARN_ON_ONCE(sk->sk_wmem_queued);
- WARN_ON_ONCE(sk_forward_alloc_get(sk));
+ WARN_ON_ONCE(sk->sk_forward_alloc);
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 814300eee39d..a648fff71ea7 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1064,8 +1064,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
return 0;
}
- if (__in_dev_get_rtnl(dev)) {
- IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
+ if (__in_dev_get_rtnl_net(dev)) {
+ IN_DEV_CONF_SET(__in_dev_get_rtnl_net(dev), PROXY_ARP, on);
return 0;
}
return -ENXIO;
@@ -1295,14 +1295,14 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCDARP:
- rtnl_lock();
+ rtnl_net_lock(net);
err = arp_req_delete(net, &r);
- rtnl_unlock();
+ rtnl_net_unlock(net);
break;
case SIOCSARP:
- rtnl_lock();
+ rtnl_net_lock(net);
err = arp_req_set(net, &r);
- rtnl_unlock();
+ rtnl_net_unlock(net);
break;
case SIOCGARP:
rcu_read_lock();
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 55b8151759bc..754f60fb6e25 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -46,6 +46,7 @@
#include <linux/notifier.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
+#include "igmp_internal.h"
#include <linux/slab.h>
#include <linux/hash.h>
#ifdef CONFIG_SYSCTL
@@ -107,15 +108,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_PROTO] = { .type = NLA_U8 },
};
-struct inet_fill_args {
- u32 portid;
- u32 seq;
- int event;
- unsigned int flags;
- int netnsid;
- int ifindex;
-};
-
#define IN4_ADDR_HSIZE_SHIFT 8
#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
@@ -1847,9 +1839,38 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
return 0;
}
-static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
- struct netlink_callback *cb, int *s_ip_idx,
- struct inet_fill_args *fillargs)
+static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb,
+ struct netlink_callback *cb, int *s_ip_idx,
+ struct inet_fill_args *fillargs)
+{
+ struct ip_mc_list *im;
+ int ip_idx = 0;
+ int err;
+
+ for (im = rcu_dereference(in_dev->mc_list);
+ im;
+ im = rcu_dereference(im->next_rcu)) {
+ if (ip_idx < *s_ip_idx) {
+ ip_idx++;
+ continue;
+ }
+ err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs);
+ if (err < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ ip_idx++;
+ }
+ err = 0;
+ ip_idx = 0;
+done:
+ *s_ip_idx = ip_idx;
+ return err;
+}
+
+static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb,
+ struct netlink_callback *cb, int *s_ip_idx,
+ struct inet_fill_args *fillargs)
{
struct in_ifaddr *ifa;
int ip_idx = 0;
@@ -1875,6 +1896,21 @@ done:
return err;
}
+static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
+ struct netlink_callback *cb, int *s_ip_idx,
+ struct inet_fill_args *fillargs)
+{
+ switch (fillargs->event) {
+ case RTM_NEWADDR:
+ return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs);
+ case RTM_GETMULTICAST:
+ return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx,
+ fillargs);
+ default:
+ return -EINVAL;
+ }
+}
+
/* Combine dev_addr_genid and dev_base_seq to detect changes.
*/
static u32 inet_base_seq(const struct net *net)
@@ -1890,13 +1926,14 @@ static u32 inet_base_seq(const struct net *net)
return res;
}
-static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
+ int event)
{
const struct nlmsghdr *nlh = cb->nlh;
struct inet_fill_args fillargs = {
.portid = NETLINK_CB(cb->skb).portid,
.seq = nlh->nlmsg_seq,
- .event = RTM_NEWADDR,
+ .event = event,
.flags = NLM_F_MULTI,
.netnsid = -1,
};
@@ -1950,6 +1987,16 @@ done:
return err;
}
+static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return inet_dump_addr(skb, cb, RTM_NEWADDR);
+}
+
+static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return inet_dump_addr(skb, cb, RTM_GETMULTICAST);
+}
+
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid)
{
@@ -2846,6 +2893,8 @@ static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
{.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
.doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
.flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST,
+ .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED},
};
void __init devinet_init(void)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 9517b8667e00..6b3d6a957822 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -201,12 +201,12 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
return 0;
- if (fib_rule_port_range_set(&rule->sport_range) &&
- !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+ if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask,
+ fl4->fl4_sport))
return 0;
- if (fib_rule_port_range_set(&rule->dport_range) &&
- !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+ if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask,
+ fl4->fl4_dport))
return 0;
return 1;
@@ -245,9 +245,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(skb->sk);
+ struct fib4_rule *rule4 = (struct fib4_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct fib4_rule *rule4 = (struct fib4_rule *) rule;
if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) {
NL_SET_ERR_MSG(extack,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5482edb5aade..799775ba97d4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -405,7 +405,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc;
struct flowi4 fl4;
struct sock *sk;
- struct inet_sock *inet;
__be32 daddr, saddr;
u32 mark = IP4_REPLY_MARK(net, skb->mark);
int type = icmp_param->data.icmph.type;
@@ -424,12 +423,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
sk = icmp_xmit_lock(net);
if (!sk)
goto out_bh_enable;
- inet = inet_sk(sk);
icmp_param->data.icmph.checksum = 0;
ipcm_init(&ipc);
- inet->tos = ip_hdr(skb)->tos;
+ ipc.tos = ip_hdr(skb)->tos;
ipc.sockc.mark = mark;
daddr = ipc.addr = ip_hdr(skb)->saddr;
saddr = fib_compute_spec_dst(skb);
@@ -737,8 +735,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
icmp_param.data.icmph.checksum = 0;
icmp_param.skb = skb_in;
icmp_param.offset = skb_network_offset(skb_in);
- inet_sk(sk)->tos = tos;
ipcm_init(&ipc);
+ ipc.tos = tos;
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts.opt;
ipc.sockc.mark = mark;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3da126cea884..2c394c364cb9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -81,6 +81,7 @@
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
+#include "igmp_internal.h"
#include <linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/times.h>
@@ -1432,14 +1433,16 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
*mc_hash = im->next_hash;
}
-static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
- const struct ip_mc_list *im, int event)
+int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
+ const struct ip_mc_list *im,
+ struct inet_fill_args *args)
{
struct ifa_cacheinfo ci;
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+ sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
@@ -1468,6 +1471,9 @@ static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
static void inet_ifmcaddr_notify(struct net_device *dev,
const struct ip_mc_list *im, int event)
{
+ struct inet_fill_args fillargs = {
+ .event = event,
+ };
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOMEM;
@@ -1479,7 +1485,7 @@ static void inet_ifmcaddr_notify(struct net_device *dev,
if (!skb)
goto error;
- err = inet_fill_ifmcaddr(skb, dev, im, event);
+ err = inet_fill_ifmcaddr(skb, dev, im, &fillargs);
if (err < 0) {
WARN_ON_ONCE(err == -EMSGSIZE);
nlmsg_free(skb);
diff --git a/net/ipv4/igmp_internal.h b/net/ipv4/igmp_internal.h
new file mode 100644
index 000000000000..0a1bcc8ec8e1
--- /dev/null
+++ b/net/ipv4/igmp_internal.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_IGMP_INTERNAL_H
+#define _LINUX_IGMP_INTERNAL_H
+
+struct inet_fill_args {
+ u32 portid;
+ u32 seq;
+ int event;
+ unsigned int flags;
+ int netnsid;
+ int ifindex;
+};
+
+int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
+ const struct ip_mc_list *im,
+ struct inet_fill_args *args);
+#endif
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e4decfb270fa..bf9ce0c19657 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -799,18 +799,6 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk)
sk_stop_timer_sync(sk, &sk->sk_timer);
}
-void inet_csk_delete_keepalive_timer(struct sock *sk)
-{
- sk_stop_timer(sk, &sk->sk_timer);
-}
-EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
-
-void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
-{
- sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
-}
-EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
-
struct dst_entry *inet_csk_route_req(const struct sock *sk,
struct flowi4 *fl4,
const struct request_sock *req)
@@ -1249,39 +1237,59 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
const gfp_t priority)
{
struct sock *newsk = sk_clone_lock(sk, priority);
+ struct inet_connection_sock *newicsk;
+ struct inet_request_sock *ireq;
+ struct inet_sock *newinet;
- if (newsk) {
- struct inet_connection_sock *newicsk = inet_csk(newsk);
+ if (!newsk)
+ return NULL;
- inet_sk_set_state(newsk, TCP_SYN_RECV);
- newicsk->icsk_bind_hash = NULL;
- newicsk->icsk_bind2_hash = NULL;
+ newicsk = inet_csk(newsk);
+ newinet = inet_sk(newsk);
+ ireq = inet_rsk(req);
- inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
- inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
- inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
+ newicsk->icsk_bind_hash = NULL;
+ newicsk->icsk_bind2_hash = NULL;
- /* listeners have SOCK_RCU_FREE, not the children */
- sock_reset_flag(newsk, SOCK_RCU_FREE);
+ newinet->inet_dport = ireq->ir_rmt_port;
+ newinet->inet_num = ireq->ir_num;
+ newinet->inet_sport = htons(ireq->ir_num);
- inet_sk(newsk)->mc_list = NULL;
+ newsk->sk_bound_dev_if = ireq->ir_iif;
- newsk->sk_mark = inet_rsk(req)->ir_mark;
- atomic64_set(&newsk->sk_cookie,
- atomic64_read(&inet_rsk(req)->ir_cookie));
+ newsk->sk_daddr = ireq->ir_rmt_addr;
+ newsk->sk_rcv_saddr = ireq->ir_loc_addr;
+ newinet->inet_saddr = ireq->ir_loc_addr;
- newicsk->icsk_retransmits = 0;
- newicsk->icsk_backoff = 0;
- newicsk->icsk_probes_out = 0;
- newicsk->icsk_probes_tstamp = 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+ newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+#endif
- /* Deinitialize accept_queue to trap illegal accesses. */
- memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+ /* listeners have SOCK_RCU_FREE, not the children */
+ sock_reset_flag(newsk, SOCK_RCU_FREE);
- inet_clone_ulp(req, newsk, priority);
+ inet_sk(newsk)->mc_list = NULL;
+
+ newsk->sk_mark = inet_rsk(req)->ir_mark;
+ atomic64_set(&newsk->sk_cookie,
+ atomic64_read(&inet_rsk(req)->ir_cookie));
+
+ newicsk->icsk_retransmits = 0;
+ newicsk->icsk_backoff = 0;
+ newicsk->icsk_probes_out = 0;
+ newicsk->icsk_probes_tstamp = 0;
+
+ /* Deinitialize accept_queue to trap illegal accesses. */
+ memset(&newicsk->icsk_accept_queue, 0,
+ sizeof(newicsk->icsk_accept_queue));
+
+ inet_sk_set_state(newsk, TCP_SYN_RECV);
+
+ inet_clone_ulp(req, newsk, priority);
+
+ security_inet_csk_clone(newsk, req);
- security_inet_csk_clone(newsk, req);
- }
return newsk;
}
EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 321acc8abf17..efe2a085cf68 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -282,7 +282,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
.idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
- .idiag_fmem = sk_forward_alloc_get(sk),
+ .idiag_fmem = READ_ONCE(sk->sk_forward_alloc),
.idiag_tmem = sk_wmem_alloc_get(sk),
};
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index b8b23a77ceb4..7b1e0a2d6906 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -60,7 +60,7 @@ void inet_peer_base_init(struct inet_peer_base *bp)
seqlock_init(&bp->lock);
bp->total = 0;
}
-EXPORT_SYMBOL_GPL(inet_peer_base_init);
+EXPORT_IPV6_MOD_GPL(inet_peer_base_init);
#define PEER_MAX_GC 32
@@ -218,7 +218,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
return p;
}
-EXPORT_SYMBOL_GPL(inet_getpeer);
+EXPORT_IPV6_MOD_GPL(inet_getpeer);
void inet_putpeer(struct inet_peer *p)
{
@@ -269,7 +269,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
WRITE_ONCE(peer->rate_tokens, token);
return rc;
}
-EXPORT_SYMBOL(inet_peer_xrlim_allow);
+EXPORT_IPV6_MOD(inet_peer_xrlim_allow);
void inetpeer_invalidate_tree(struct inet_peer_base *base)
{
@@ -286,4 +286,4 @@ void inetpeer_invalidate_tree(struct inet_peer_base *base)
base->total = 0;
}
-EXPORT_SYMBOL(inetpeer_invalidate_tree);
+EXPORT_IPV6_MOD(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ed1b6b44faf8..c9f11a046c26 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- unsigned int data_len = 0;
struct ip_tunnel *t;
if (tpi->proto == htons(ETH_P_TEB))
@@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
- data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
@@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
}
#if IS_ENABLED(CONFIG_IPV6)
- if (tpi->proto == htons(ETH_P_IPV6) &&
- !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
- type, data_len))
- return 0;
+ if (tpi->proto == htons(ETH_P_IPV6)) {
+ unsigned int data_len = 0;
+
+ if (type == ICMP_TIME_EXCEEDED)
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
+
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return 0;
+ }
#endif
if (t->parms.iph.daddr == 0 ||
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 619ddc087957..85d09f2ecadc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -705,7 +705,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ip_options_data opt_copy;
int free = 0;
__be32 saddr, daddr, faddr;
- u8 tos, scope;
+ u8 scope;
int err;
pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -768,7 +768,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
faddr = ipc.opt->opt.faddr;
}
- tos = get_rttos(&ipc, inet);
scope = ip_sendmsg_scope(inet, &ipc, msg);
if (ipv4_is_multicast(daddr)) {
@@ -779,7 +778,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} else if (!ipc.oif)
ipc.oif = READ_ONCE(inet->uc_index);
- flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
+ flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark,
+ ipc.tos & INET_DSCP_MASK, scope,
sk->sk_protocol, inet_sk_flowi_flags(sk), faddr,
saddr, 0, 0, sk->sk_uid);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4304a68d1db0..6aace4d55733 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -486,7 +486,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
struct flowi4 fl4;
- u8 tos, scope;
+ u8 scope;
int free = 0;
__be32 daddr;
__be32 saddr;
@@ -581,7 +581,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
daddr = ipc.opt->opt.faddr;
}
}
- tos = get_rttos(&ipc, inet);
scope = ip_sendmsg_scope(inet, &ipc, msg);
uc_index = READ_ONCE(inet->uc_index);
@@ -606,7 +605,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
}
- flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
+ flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark,
+ ipc.tos & INET_DSCP_MASK, scope,
hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 1948d15f1f28..26816b876dd8 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -222,7 +222,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
return NULL;
}
-EXPORT_SYMBOL(tcp_get_cookie_sock);
+EXPORT_IPV6_MOD(tcp_get_cookie_sock);
/*
* when syncookies are in effect and tcp timestamps are enabled we stored
@@ -259,7 +259,7 @@ bool cookie_timestamp_decode(const struct net *net,
return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
}
-EXPORT_SYMBOL(cookie_timestamp_decode);
+EXPORT_IPV6_MOD(cookie_timestamp_decode);
static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
@@ -310,7 +310,7 @@ struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb)
return req;
}
-EXPORT_SYMBOL_GPL(cookie_bpf_check);
+EXPORT_IPV6_MOD_GPL(cookie_bpf_check);
#endif
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
@@ -351,7 +351,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
return req;
}
-EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc);
+EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc);
static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
struct sk_buff *skb)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 42cb5dc9cb24..3a43010d726f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,6 +28,7 @@ static int tcp_adv_win_scale_max = 31;
static int tcp_app_win_max = 31;
static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
static int tcp_min_snd_mss_max = 65535;
+static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC;
static int ip_privileged_port_min;
static int ip_privileged_port_max = 65535;
static int ip_ttl_min = 1;
@@ -1583,6 +1584,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
},
+ {
+ .procname = "tcp_rto_max_ms",
+ .data = &init_net.ipv4.sysctl_tcp_rto_max_ms,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE_THOUSAND,
+ .extra2 = &tcp_rto_max_max,
+ },
};
static __net_init int ipv4_sysctl_init_net(struct net *net)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0d704bda6c41..6a8f19a10911 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -300,10 +300,10 @@ DEFINE_PER_CPU(u32, tcp_tw_isn);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly;
-EXPORT_SYMBOL(sysctl_tcp_mem);
+EXPORT_IPV6_MOD(sysctl_tcp_mem);
atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */
-EXPORT_SYMBOL(tcp_memory_allocated);
+EXPORT_IPV6_MOD(tcp_memory_allocated);
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc);
@@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_have_smc);
* Current number of TCP sockets.
*/
struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp;
-EXPORT_SYMBOL(tcp_sockets_allocated);
+EXPORT_IPV6_MOD(tcp_sockets_allocated);
/*
* TCP splice context
@@ -349,7 +349,7 @@ void tcp_enter_memory_pressure(struct sock *sk)
if (!cmpxchg(&tcp_memory_pressure, 0, val))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
}
-EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure);
+EXPORT_IPV6_MOD_GPL(tcp_enter_memory_pressure);
void tcp_leave_memory_pressure(struct sock *sk)
{
@@ -362,7 +362,7 @@ void tcp_leave_memory_pressure(struct sock *sk)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO,
jiffies_to_msecs(jiffies - val));
}
-EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure);
+EXPORT_IPV6_MOD_GPL(tcp_leave_memory_pressure);
/* Convert seconds to retransmits based on initial and max timeout */
static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
@@ -423,7 +423,7 @@ void tcp_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int rto_min_us;
+ int rto_min_us, rto_max_ms;
tp->out_of_order_queue = RB_ROOT;
sk->tcp_rtx_queue = RB_ROOT;
@@ -432,6 +432,10 @@ void tcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
+
+ rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms);
+ icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms);
+
rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us);
icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us);
icsk->icsk_delack_max = TCP_DELACK_MAX;
@@ -475,7 +479,7 @@ void tcp_init_sock(struct sock *sk)
sk_sockets_allocated_inc(sk);
xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1);
}
-EXPORT_SYMBOL(tcp_init_sock);
+EXPORT_IPV6_MOD(tcp_init_sock);
static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
{
@@ -660,7 +664,7 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg)
*karg = answ;
return 0;
}
-EXPORT_SYMBOL(tcp_ioctl);
+EXPORT_IPV6_MOD(tcp_ioctl);
void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
@@ -876,7 +880,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
return ret;
}
-EXPORT_SYMBOL(tcp_splice_read);
+EXPORT_IPV6_MOD(tcp_splice_read);
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule)
@@ -1123,7 +1127,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
/* 'common' sending to sendq */
}
- sockcm_init(&sockc, sk);
+ sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)};
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err)) {
@@ -1376,7 +1380,7 @@ void tcp_splice_eof(struct socket *sock)
tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
release_sock(sk);
}
-EXPORT_SYMBOL_GPL(tcp_splice_eof);
+EXPORT_IPV6_MOD_GPL(tcp_splice_eof);
/*
* Handle reading urgent data. BSD has very simple semantics for
@@ -1667,7 +1671,7 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
}
return copied;
}
-EXPORT_SYMBOL(tcp_read_skb);
+EXPORT_IPV6_MOD(tcp_read_skb);
void tcp_read_done(struct sock *sk, size_t len)
{
@@ -1712,7 +1716,7 @@ int tcp_peek_len(struct socket *sock)
{
return tcp_inq(sock->sk);
}
-EXPORT_SYMBOL(tcp_peek_len);
+EXPORT_IPV6_MOD(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
@@ -1739,7 +1743,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
}
return 0;
}
-EXPORT_SYMBOL(tcp_set_rcvlowat);
+EXPORT_IPV6_MOD(tcp_set_rcvlowat);
void tcp_update_recv_tstamps(struct sk_buff *skb,
struct scm_timestamping_internal *tss)
@@ -1772,7 +1776,7 @@ int tcp_mmap(struct file *file, struct socket *sock,
vma->vm_ops = &tcp_vm_ops;
return 0;
}
-EXPORT_SYMBOL(tcp_mmap);
+EXPORT_IPV6_MOD(tcp_mmap);
static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
u32 *offset_frag)
@@ -2476,6 +2480,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
}
niov = skb_frag_net_iov(frag);
+ if (!net_is_devmem_iov(niov)) {
+ err = -ENODEV;
+ goto out;
+ }
+
end = start + skb_frag_size(frag);
copy = end - offset;
@@ -2494,7 +2503,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
/* Will perform the exchange later */
dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx];
- dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov);
+ dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov);
offset += copy;
remaining_len -= copy;
@@ -2864,7 +2873,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
}
return ret;
}
-EXPORT_SYMBOL(tcp_recvmsg);
+EXPORT_IPV6_MOD(tcp_recvmsg);
void tcp_set_state(struct sock *sk, int state)
{
@@ -2994,7 +3003,7 @@ void tcp_shutdown(struct sock *sk, int how)
tcp_send_fin(sk);
}
}
-EXPORT_SYMBOL(tcp_shutdown);
+EXPORT_IPV6_MOD(tcp_shutdown);
int tcp_orphan_count_sum(void)
{
@@ -3174,7 +3183,7 @@ adjudge_to_death:
const int tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
- inet_csk_reset_keepalive_timer(sk,
+ tcp_reset_keepalive_timer(sk,
tmo - TCP_TIMEWAIT_LEN);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
@@ -3493,7 +3502,7 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
}
DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
-EXPORT_SYMBOL(tcp_tx_delay_enabled);
+EXPORT_IPV6_MOD(tcp_tx_delay_enabled);
static void tcp_enable_tx_delay(void)
{
@@ -3627,7 +3636,7 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
elapsed = tp->keepalive_time - elapsed;
else
elapsed = 0;
- inet_csk_reset_keepalive_timer(sk, elapsed);
+ tcp_reset_keepalive_timer(sk, elapsed);
}
return 0;
@@ -3802,6 +3811,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
TCP_RTO_MAX / HZ));
return 0;
+ case TCP_RTO_MAX_MS:
+ if (val < MSEC_PER_SEC || val > TCP_RTO_MAX_SEC * MSEC_PER_SEC)
+ return -EINVAL;
+ WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val));
+ return 0;
}
sockopt_lock_sock(sk);
@@ -4031,7 +4045,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
optval, optlen);
return do_tcp_setsockopt(sk, level, optname, optval, optlen);
}
-EXPORT_SYMBOL(tcp_setsockopt);
+EXPORT_IPV6_MOD(tcp_setsockopt);
static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
struct tcp_info *info)
@@ -4638,6 +4652,9 @@ zerocopy_rcv_out:
case TCP_IS_MPTCP:
val = 0;
break;
+ case TCP_RTO_MAX_MS:
+ val = jiffies_to_msecs(tcp_rto_max(sk));
+ break;
default:
return -ENOPROTOOPT;
}
@@ -4659,7 +4676,7 @@ bool tcp_bpf_bypass_getsockopt(int level, int optname)
return false;
}
-EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt);
+EXPORT_IPV6_MOD(tcp_bpf_bypass_getsockopt);
int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen)
@@ -4673,11 +4690,11 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval),
USER_SOCKPTR(optlen));
}
-EXPORT_SYMBOL(tcp_getsockopt);
+EXPORT_IPV6_MOD(tcp_getsockopt);
#ifdef CONFIG_TCP_MD5SIG
int tcp_md5_sigpool_id = -1;
-EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id);
+EXPORT_IPV6_MOD_GPL(tcp_md5_sigpool_id);
int tcp_md5_alloc_sigpool(void)
{
@@ -4723,7 +4740,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp,
*/
return data_race(crypto_ahash_update(hp->req));
}
-EXPORT_SYMBOL(tcp_md5_hash_key);
+EXPORT_IPV6_MOD(tcp_md5_hash_key);
/* Called with rcu_read_lock() */
static enum skb_drop_reason
@@ -4843,7 +4860,7 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req,
return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family,
l3index, md5_location);
}
-EXPORT_SYMBOL_GPL(tcp_inbound_hash);
+EXPORT_IPV6_MOD_GPL(tcp_inbound_hash);
void tcp_done(struct sock *sk)
{
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 32b28fc21b63..1a6b1bc54245 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -274,8 +274,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
* because it's been added to the accept queue directly.
*/
req->timeout = tcp_timeout_init(child);
- inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
- req->timeout, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(child, ICSK_TIME_RETRANS,
+ req->timeout, false);
refcount_set(&req->rsk_refcnt, 2);
@@ -468,7 +468,7 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
}
return false;
}
-EXPORT_SYMBOL(tcp_fastopen_defer_connect);
+EXPORT_IPV6_MOD(tcp_fastopen_defer_connect);
/*
* The following code block is to deal with middle box issues with TFO:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0cbf81bf3d45..5fddcd0bbe91 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -636,7 +636,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
inet_csk(sk)->icsk_ack.rcv_mss = hint;
}
-EXPORT_SYMBOL(tcp_initialize_rcv_mss);
+EXPORT_IPV6_MOD(tcp_initialize_rcv_mss);
/* Receiver "autotuning" code.
*
@@ -2258,8 +2258,7 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag)
unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
msecs_to_jiffies(10));
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- delay, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, false);
*ack_flag &= ~FLAG_SET_XMIT_TIMER;
return true;
}
@@ -2716,6 +2715,8 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost,
if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
return;
+ trace_tcp_cwnd_reduction_tp(sk, newly_acked_sacked, newly_lost, flag);
+
tp->prr_delivered += newly_acked_sacked;
if (delta < 0) {
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
@@ -2898,7 +2899,7 @@ void tcp_simple_retransmit(struct sock *sk)
*/
tcp_non_congestion_loss_retransmit(sk);
}
-EXPORT_SYMBOL(tcp_simple_retransmit);
+EXPORT_IPV6_MOD(tcp_simple_retransmit);
void tcp_enter_recovery(struct sock *sk, bool ece_ack)
{
@@ -3288,8 +3289,7 @@ void tcp_rearm_rto(struct sock *sk)
*/
rto = usecs_to_jiffies(max_t(int, delta_us, 1));
}
- tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
- TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, true);
}
}
@@ -3566,10 +3566,10 @@ static void tcp_ack_probe(struct sock *sk)
* This function is not for random using!
*/
} else {
- unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
+ unsigned long when = tcp_probe0_when(sk, tcp_rto_max(sk));
when = tcp_clamp_probe0_to_user_timeout(sk, when);
- tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, true);
}
}
@@ -4180,7 +4180,6 @@ u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
}
return mss;
}
-EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
@@ -4530,7 +4529,7 @@ void tcp_done_with_error(struct sock *sk, int err)
if (!sock_flag(sk, SOCK_DEAD))
sk_error_report(sk);
}
-EXPORT_SYMBOL(tcp_done_with_error);
+EXPORT_IPV6_MOD(tcp_done_with_error);
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk, struct sk_buff *skb)
@@ -6300,7 +6299,7 @@ csum_error:
discard:
tcp_drop_reason(sk, skb, reason);
}
-EXPORT_SYMBOL(tcp_rcv_established);
+EXPORT_IPV6_MOD(tcp_rcv_established);
void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
{
@@ -6353,7 +6352,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
tp->lsndtime = tcp_jiffies32;
if (sock_flag(sk, SOCK_KEEPOPEN))
- inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
+ tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
if (!tp->rx_opt.snd_wscale)
__tcp_fast_path_on(tp, tp->snd_wnd);
@@ -6476,9 +6475,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
/* Previous FIN/ACK or RST/ACK might be ignored. */
if (icsk->icsk_retransmits == 0)
- inet_csk_reset_xmit_timer(sk,
- ICSK_TIME_RETRANS,
- TCP_TIMEOUT_MIN, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ TCP_TIMEOUT_MIN, false);
SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE);
goto reset_and_undo;
}
@@ -6593,8 +6591,8 @@ consume:
*/
inet_csk_schedule_ack(sk);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- TCP_DELACK_MAX, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ TCP_DELACK_MAX, false);
goto consume;
}
tcp_send_ack(sk);
@@ -6928,7 +6926,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
- inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
+ tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
} else if (th->fin || sock_owned_by_user(sk)) {
/* Bad case. We could lose such FIN otherwise.
* It is not a big problem, but it looks confusing
@@ -6936,7 +6934,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
* if it spins in bh_lock_sock(), but it is really
* marginal case.
*/
- inet_csk_reset_keepalive_timer(sk, tmo);
+ tcp_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto consume;
@@ -7014,7 +7012,7 @@ consume:
__kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(tcp_rcv_state_process);
+EXPORT_IPV6_MOD(tcp_rcv_state_process);
static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
{
@@ -7196,7 +7194,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
return mss;
}
-EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
+EXPORT_IPV6_MOD_GPL(tcp_get_syncookie_mss);
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
@@ -7377,4 +7375,4 @@ drop:
tcp_listendrop(sk);
return 0;
}
-EXPORT_SYMBOL(tcp_conn_request);
+EXPORT_IPV6_MOD(tcp_conn_request);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2632844d2c35..7900855237d9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -92,7 +92,6 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
#endif
struct inet_hashinfo tcp_hashinfo;
-EXPORT_SYMBOL(tcp_hashinfo);
static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
@@ -199,7 +198,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
return 0;
}
-EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+EXPORT_IPV6_MOD_GPL(tcp_twsk_unique);
static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
@@ -359,7 +358,7 @@ failure:
inet->inet_dport = 0;
return err;
}
-EXPORT_SYMBOL(tcp_v4_connect);
+EXPORT_IPV6_MOD(tcp_v4_connect);
/*
* This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
@@ -400,7 +399,7 @@ void tcp_v4_mtu_reduced(struct sock *sk)
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
}
-EXPORT_SYMBOL(tcp_v4_mtu_reduced);
+EXPORT_IPV6_MOD(tcp_v4_mtu_reduced);
static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
@@ -434,7 +433,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
}
reqsk_put(req);
}
-EXPORT_SYMBOL(tcp_req_err);
+EXPORT_IPV6_MOD(tcp_req_err);
/* TCP-LD (RFC 6069) logic */
void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
@@ -458,15 +457,14 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
icsk->icsk_backoff--;
icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT;
- icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
+ icsk->icsk_rto = inet_csk_rto_backoff(icsk, tcp_rto_max(sk));
tcp_mstamp_refresh(tp);
delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us);
if (remaining > 0) {
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- remaining, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, false);
} else {
/* RTO revert clocked out retransmission.
* Will retransmit now.
@@ -474,7 +472,7 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
tcp_retransmit_timer(sk);
}
}
-EXPORT_SYMBOL(tcp_ld_RTO_revert);
+EXPORT_IPV6_MOD(tcp_ld_RTO_revert);
/*
* This routine is called by the ICMP module when it gets some
@@ -676,7 +674,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
}
-EXPORT_SYMBOL(tcp_v4_send_check);
+EXPORT_IPV6_MOD(tcp_v4_send_check);
#define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32))
@@ -1231,7 +1229,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
*/
DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ);
-EXPORT_SYMBOL(tcp_md5_needed);
+EXPORT_IPV6_MOD(tcp_md5_needed);
static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new)
{
@@ -1290,7 +1288,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
}
return best_match;
}
-EXPORT_SYMBOL(__tcp_md5_do_lookup);
+EXPORT_IPV6_MOD(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
@@ -1337,7 +1335,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
return tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
}
-EXPORT_SYMBOL(tcp_v4_md5_lookup);
+EXPORT_IPV6_MOD(tcp_v4_md5_lookup);
static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp)
{
@@ -1433,7 +1431,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags,
newkey, newkeylen, GFP_KERNEL);
}
-EXPORT_SYMBOL(tcp_md5_do_add);
+EXPORT_IPV6_MOD(tcp_md5_do_add);
int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen, int l3index,
@@ -1465,7 +1463,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
key->flags, key->key, key->keylen,
sk_gfp_mask(sk, GFP_ATOMIC));
}
-EXPORT_SYMBOL(tcp_md5_key_copy);
+EXPORT_IPV6_MOD(tcp_md5_key_copy);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
u8 prefixlen, int l3index, u8 flags)
@@ -1480,7 +1478,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
kfree_rcu(key, rcu);
return 0;
}
-EXPORT_SYMBOL(tcp_md5_do_del);
+EXPORT_IPV6_MOD(tcp_md5_do_del);
void tcp_clear_md5_list(struct sock *sk)
{
@@ -1659,7 +1657,7 @@ clear_hash_nostart:
memset(md5_hash, 0, 16);
return 1;
}
-EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
+EXPORT_IPV6_MOD(tcp_v4_md5_hash_skb);
#endif
@@ -1732,7 +1730,7 @@ drop:
tcp_listendrop(sk);
return 0;
}
-EXPORT_SYMBOL(tcp_v4_conn_request);
+EXPORT_IPV6_MOD(tcp_v4_conn_request);
/*
@@ -1770,10 +1768,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- sk_daddr_set(newsk, ireq->ir_rmt_addr);
- sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
- newsk->sk_bound_dev_if = ireq->ir_iif;
- newinet->inet_saddr = ireq->ir_loc_addr;
inet_opt = rcu_dereference(ireq->ireq_opt);
RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
@@ -1856,7 +1850,7 @@ put_and_exit:
tcp_done(newsk);
goto exit;
}
-EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
+EXPORT_IPV6_MOD(tcp_v4_syn_recv_sock);
static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
{
@@ -2135,7 +2129,7 @@ no_coalesce:
}
return false;
}
-EXPORT_SYMBOL(tcp_add_backlog);
+EXPORT_IPV6_MOD(tcp_add_backlog);
int tcp_filter(struct sock *sk, struct sk_buff *skb)
{
@@ -2143,7 +2137,7 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
return sk_filter_trim_cap(sk, skb, th->doff * 4);
}
-EXPORT_SYMBOL(tcp_filter);
+EXPORT_IPV6_MOD(tcp_filter);
static void tcp_v4_restore_cb(struct sk_buff *skb)
{
@@ -2452,7 +2446,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
sk->sk_rx_dst_ifindex = skb->skb_iif;
}
}
-EXPORT_SYMBOL(inet_sk_rx_dst_set);
+EXPORT_IPV6_MOD(inet_sk_rx_dst_set);
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
@@ -2468,7 +2462,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.sockaddr_len = sizeof(struct sockaddr_in),
.mtu_reduced = tcp_v4_mtu_reduced,
};
-EXPORT_SYMBOL(ipv4_specific);
+EXPORT_IPV6_MOD(ipv4_specific);
#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
@@ -2578,7 +2572,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
sk_sockets_allocated_dec(sk);
}
-EXPORT_SYMBOL(tcp_v4_destroy_sock);
+EXPORT_IPV6_MOD(tcp_v4_destroy_sock);
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
@@ -2814,7 +2808,7 @@ out:
st->last_pos = *pos;
return rc;
}
-EXPORT_SYMBOL(tcp_seq_start);
+EXPORT_IPV6_MOD(tcp_seq_start);
void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
@@ -2845,7 +2839,7 @@ out:
st->last_pos = *pos;
return rc;
}
-EXPORT_SYMBOL(tcp_seq_next);
+EXPORT_IPV6_MOD(tcp_seq_next);
void tcp_seq_stop(struct seq_file *seq, void *v)
{
@@ -2863,7 +2857,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v)
break;
}
}
-EXPORT_SYMBOL(tcp_seq_stop);
+EXPORT_IPV6_MOD(tcp_seq_stop);
static void get_openreq4(const struct request_sock *req,
struct seq_file *f, int i)
@@ -3533,6 +3527,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_pingpong_thresh = 1;
net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN);
+ net->ipv4.sysctl_tcp_rto_max_ms = TCP_RTO_MAX_SEC * MSEC_PER_SEC;
return 0;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b089b08e9617..1eccc518b957 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -264,7 +264,7 @@ kill:
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
}
-EXPORT_SYMBOL(tcp_timewait_state_process);
+EXPORT_IPV6_MOD(tcp_timewait_state_process);
static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw)
{
@@ -398,7 +398,7 @@ void tcp_twsk_destructor(struct sock *sk)
#endif
tcp_ao_destroy_sock(sk, true);
}
-EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
+EXPORT_IPV6_MOD_GPL(tcp_twsk_destructor);
void tcp_twsk_purge(struct list_head *net_exit_list)
{
@@ -457,7 +457,6 @@ void tcp_openreq_init_rwin(struct request_sock *req,
rcv_wnd);
ireq->rcv_wscale = rcv_wscale;
}
-EXPORT_SYMBOL(tcp_openreq_init_rwin);
static void tcp_ecn_openreq_child(struct tcp_sock *tp,
const struct request_sock *req)
@@ -492,7 +491,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
tcp_set_ca_state(sk, TCP_CA_Open);
}
-EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
+EXPORT_IPV6_MOD_GPL(tcp_ca_openreq_child);
static void smc_check_reset_syn_req(const struct tcp_sock *oldtp,
struct request_sock *req,
@@ -566,8 +565,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
if (sock_flag(newsk, SOCK_KEEPOPEN))
- inet_csk_reset_keepalive_timer(newsk,
- keepalive_time_when(newtp));
+ tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp));
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
newtp->rx_opt.sack_ok = ireq->sack_ok;
@@ -910,7 +908,7 @@ embryonic_reset:
}
return NULL;
}
-EXPORT_SYMBOL(tcp_check_req);
+EXPORT_IPV6_MOD(tcp_check_req);
/*
* Queue segment on the new socket if the new socket is active,
@@ -952,4 +950,4 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
sock_put(child);
return reason;
}
-EXPORT_SYMBOL(tcp_child_process);
+EXPORT_IPV6_MOD(tcp_child_process);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bc95d2a5924f..b4b40f135432 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -250,7 +250,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
WRITE_ONCE(*__window_clamp,
min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
}
-EXPORT_SYMBOL(tcp_select_initial_window);
+EXPORT_IPV6_MOD(tcp_select_initial_window);
/* Chose a new window to advertise, update state in tcp_sock for the
* socket, and return result with RFC1323 scaling applied. The return
@@ -1171,7 +1171,7 @@ void tcp_release_cb(struct sock *sk)
if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk))
tcp_send_ack(sk);
}
-EXPORT_SYMBOL(tcp_release_cb);
+EXPORT_IPV6_MOD(tcp_release_cb);
void __init tcp_tasklet_init(void)
{
@@ -1783,7 +1783,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
return __tcp_mtu_to_mss(sk, pmtu) -
(tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
}
-EXPORT_SYMBOL(tcp_mtu_to_mss);
+EXPORT_IPV6_MOD(tcp_mtu_to_mss);
/* Inverse of above */
int tcp_mss_to_mtu(struct sock *sk, int mss)
@@ -1813,7 +1813,6 @@ void tcp_mtup_init(struct sock *sk)
if (icsk->icsk_mtup.enabled)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
}
-EXPORT_SYMBOL(tcp_mtup_init);
/* This function synchronize snd mss to current pmtu/exthdr set.
@@ -1857,7 +1856,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
return mss_now;
}
-EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_IPV6_MOD(tcp_sync_mss);
/* Compute the current effective MSS, taking SACKs and IP options,
* and even PMTU discovery events into account.
@@ -2911,7 +2910,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
if (rto_delta_us > 0)
timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
- tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, true);
return true;
}
@@ -3545,8 +3544,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
}
if (rearm_timer)
tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- inet_csk(sk)->icsk_rto,
- TCP_RTO_MAX);
+ inet_csk(sk)->icsk_rto, true);
}
/* We allow to exceed memory limits for FIN packets to expedite
@@ -3853,7 +3851,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
return skb;
}
-EXPORT_SYMBOL(tcp_make_synack);
+EXPORT_IPV6_MOD(tcp_make_synack);
static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
{
@@ -4163,8 +4161,8 @@ int tcp_connect(struct sock *sk)
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ inet_csk(sk)->icsk_rto, false);
return 0;
}
EXPORT_SYMBOL(tcp_connect);
@@ -4253,11 +4251,11 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
unsigned long delay;
delay = TCP_DELACK_MAX << icsk->icsk_ack.retry;
- if (delay < TCP_RTO_MAX)
+ if (delay < tcp_rto_max(sk))
icsk->icsk_ack.retry++;
inet_csk_schedule_ack(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, false);
return;
}
@@ -4393,7 +4391,7 @@ void tcp_send_probe0(struct sock *sk)
if (err <= 0) {
if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
- timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
+ timeout = tcp_probe0_when(sk, tcp_rto_max(sk));
} else {
/* If packet was not sent due to local congestion,
* Let senders fight for local resources conservatively.
@@ -4402,7 +4400,7 @@ void tcp_send_probe0(struct sock *sk)
}
timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout);
- tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, true);
}
int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
@@ -4430,4 +4428,4 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
}
return res;
}
-EXPORT_SYMBOL(tcp_rtx_synack);
+EXPORT_IPV6_MOD(tcp_rtx_synack);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b412ed88ccd9..728bce01ccd3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -109,7 +109,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
/* If peer does not open window for long time, or did not transmit
* anything for long time, penalize it. */
- if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+ if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*tcp_rto_max(sk) || !do_reset)
shift++;
/* If some dubious ICMP arrived, penalize even more. */
@@ -189,12 +189,12 @@ static unsigned int tcp_model_timeout(struct sock *sk,
{
unsigned int linear_backoff_thresh, timeout;
- linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
+ linear_backoff_thresh = ilog2(tcp_rto_max(sk) / rto_base);
if (boundary <= linear_backoff_thresh)
timeout = ((2 << boundary) - 1) * rto_base;
else
timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
- (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+ (boundary - linear_backoff_thresh) * tcp_rto_max(sk);
return jiffies_to_msecs(timeout);
}
/**
@@ -268,7 +268,7 @@ static int tcp_write_timeout(struct sock *sk)
retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
- const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
+ const bool alive = icsk->icsk_rto < tcp_rto_max(sk);
retry_until = tcp_orphan_retries(sk, alive);
do_reset = alive ||
@@ -416,7 +416,8 @@ static void tcp_probe_timer(struct sock *sk)
}
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
- const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
+ unsigned int rto_max = tcp_rto_max(sk);
+ const bool alive = inet_csk_rto_backoff(icsk, rto_max) < rto_max;
max_probes = tcp_orphan_retries(sk, alive);
if (!alive && icsk->icsk_backoff >= max_probes)
@@ -481,8 +482,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
tcp_update_rto_stats(sk);
if (!tp->retrans_stamp)
tp->retrans_stamp = tcp_time_stamp_ts(tp);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- req->timeout << req->num_timeout, TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ req->timeout << req->num_timeout, false);
}
static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
@@ -492,7 +493,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
const struct tcp_sock *tp = tcp_sk(sk);
- int timeout = TCP_RTO_MAX * 2;
+ int timeout = tcp_rto_max(sk) * 2;
s32 rcv_delta;
if (user_timeout) {
@@ -626,9 +627,9 @@ void tcp_retransmit_timer(struct sock *sk)
/* Retransmission failed because of local congestion,
* Let senders fight for local resources conservatively.
*/
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- TCP_RESOURCE_PROBE_INTERVAL,
- TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ TCP_RESOURCE_PROBE_INTERVAL,
+ false);
goto out;
}
@@ -665,7 +666,7 @@ out_reset_timer:
icsk->icsk_backoff = 0;
icsk->icsk_rto = clamp(__tcp_set_rto(tp),
tcp_rto_min(sk),
- TCP_RTO_MAX);
+ tcp_rto_max(sk));
} else if (sk->sk_state != TCP_SYN_SENT ||
tp->total_rto >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
@@ -673,10 +674,10 @@ out_reset_timer:
* activated.
*/
icsk->icsk_backoff++;
- icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ icsk->icsk_rto = min(icsk->icsk_rto << 1, tcp_rto_max(sk));
}
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ tcp_clamp_rto_to_user_timeout(sk), false);
if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
@@ -749,7 +750,17 @@ void tcp_syn_ack_timeout(const struct request_sock *req)
__NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
}
-EXPORT_SYMBOL(tcp_syn_ack_timeout);
+EXPORT_IPV6_MOD(tcp_syn_ack_timeout);
+
+void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len)
+{
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+}
+
+static void tcp_delete_keepalive_timer(struct sock *sk)
+{
+ sk_stop_timer(sk, &sk->sk_timer);
+}
void tcp_set_keepalive(struct sock *sk, int val)
{
@@ -757,14 +768,13 @@ void tcp_set_keepalive(struct sock *sk, int val)
return;
if (val && !sock_flag(sk, SOCK_KEEPOPEN))
- inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
+ tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
else if (!val)
- inet_csk_delete_keepalive_timer(sk);
+ tcp_delete_keepalive_timer(sk);
}
-EXPORT_SYMBOL_GPL(tcp_set_keepalive);
-
+EXPORT_IPV6_MOD_GPL(tcp_set_keepalive);
-static void tcp_keepalive_timer (struct timer_list *t)
+static void tcp_keepalive_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -775,7 +785,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
- inet_csk_reset_keepalive_timer (sk, HZ/20);
+ tcp_reset_keepalive_timer(sk, HZ/20);
goto out;
}
@@ -841,7 +851,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
}
resched:
- inet_csk_reset_keepalive_timer (sk, elapsed);
+ tcp_reset_keepalive_timer(sk, elapsed);
goto out;
death:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a9bb9ce5438e..17c7736d8349 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -121,13 +121,12 @@
#endif
struct udp_table udp_table __read_mostly;
-EXPORT_SYMBOL(udp_table);
long sysctl_udp_mem[3] __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_mem);
+EXPORT_IPV6_MOD(sysctl_udp_mem);
atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp;
-EXPORT_SYMBOL(udp_memory_allocated);
+EXPORT_IPV6_MOD(udp_memory_allocated);
DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc);
EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc);
@@ -352,7 +351,7 @@ fail_unlock:
fail:
return error;
}
-EXPORT_SYMBOL(udp_lib_get_port);
+EXPORT_IPV6_MOD(udp_lib_get_port);
int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
@@ -418,7 +417,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
return __inet_ehashfn(laddr, lport, faddr, fport,
udp_ehash_secret + net_hash_mix(net));
}
-EXPORT_SYMBOL(udp_ehashfn);
+EXPORT_IPV6_MOD(udp_ehashfn);
/**
* udp4_lib_lookup1() - Simplified lookup using primary hash (destination port)
@@ -653,7 +652,7 @@ void udp_lib_hash4(struct sock *sk, u16 hash)
spin_unlock_bh(&hslot->lock);
}
-EXPORT_SYMBOL(udp_lib_hash4);
+EXPORT_IPV6_MOD(udp_lib_hash4);
/* call with sock lock */
void udp4_hash4(struct sock *sk)
@@ -669,7 +668,7 @@ void udp4_hash4(struct sock *sk)
udp_lib_hash4(sk, hash);
}
-EXPORT_SYMBOL(udp4_hash4);
+EXPORT_IPV6_MOD(udp4_hash4);
#endif /* CONFIG_BASE_SMALL */
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
@@ -809,11 +808,11 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
-EXPORT_SYMBOL(udp_encap_needed_key);
+EXPORT_IPV6_MOD(udp_encap_needed_key);
#if IS_ENABLED(CONFIG_IPV6)
DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
-EXPORT_SYMBOL(udpv6_encap_needed_key);
+EXPORT_IPV6_MOD(udpv6_encap_needed_key);
#endif
void udp_encap_enable(void)
@@ -1041,7 +1040,7 @@ void udp_flush_pending_frames(struct sock *sk)
ip_flush_pending_frames(sk);
}
}
-EXPORT_SYMBOL(udp_flush_pending_frames);
+EXPORT_IPV6_MOD(udp_flush_pending_frames);
/**
* udp4_hwcsum - handle outgoing HW checksumming
@@ -1229,7 +1228,7 @@ out:
WRITE_ONCE(up->pending, 0);
return err;
}
-EXPORT_SYMBOL(udp_push_pending_frames);
+EXPORT_IPV6_MOD(udp_push_pending_frames);
static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size)
{
@@ -1266,7 +1265,7 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size)
return need_ip;
}
-EXPORT_SYMBOL_GPL(udp_cmsg_send);
+EXPORT_IPV6_MOD_GPL(udp_cmsg_send);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
@@ -1281,7 +1280,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int free = 0;
int connected = 0;
__be32 daddr, faddr, saddr;
- u8 tos, scope;
+ u8 scope;
__be16 dport;
int err, is_udplite = IS_UDPLITE(sk);
int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
@@ -1405,7 +1404,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
faddr = ipc.opt->opt.faddr;
connected = 0;
}
- tos = get_rttos(&ipc, inet);
scope = ip_sendmsg_scope(inet, &ipc, msg);
if (scope == RT_SCOPE_LINK)
connected = 0;
@@ -1442,7 +1440,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &fl4_stack;
- flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope,
+ flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark,
+ ipc.tos & INET_DSCP_MASK, scope,
sk->sk_protocol, flow_flags, faddr, saddr,
dport, inet->inet_sport, sk->sk_uid);
@@ -1561,7 +1560,7 @@ void udp_splice_eof(struct socket *sock)
udp_push_pending_frames(sk);
release_sock(sk);
}
-EXPORT_SYMBOL_GPL(udp_splice_eof);
+EXPORT_IPV6_MOD_GPL(udp_splice_eof);
#define UDP_SKB_IS_STATELESS 0x80000000
@@ -1678,7 +1677,7 @@ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
prefetch(&skb->data);
udp_rmem_release(sk, udp_skb_truesize(skb), 1, false);
}
-EXPORT_SYMBOL(udp_skb_destructor);
+EXPORT_IPV6_MOD(udp_skb_destructor);
/* as above, but the caller held the rx queue lock, too */
static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
@@ -1785,7 +1784,7 @@ drop:
busylock_release(busy);
return err;
}
-EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
+EXPORT_IPV6_MOD_GPL(__udp_enqueue_schedule_skb);
void udp_destruct_common(struct sock *sk)
{
@@ -1801,7 +1800,7 @@ void udp_destruct_common(struct sock *sk)
}
udp_rmem_release(sk, total, 0, true);
}
-EXPORT_SYMBOL_GPL(udp_destruct_common);
+EXPORT_IPV6_MOD_GPL(udp_destruct_common);
static void udp_destruct_sock(struct sock *sk)
{
@@ -1832,7 +1831,7 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
skb_release_head_state(skb);
__consume_stateless_skb(skb);
}
-EXPORT_SYMBOL_GPL(skb_consume_udp);
+EXPORT_IPV6_MOD_GPL(skb_consume_udp);
static struct sk_buff *__first_packet_length(struct sock *sk,
struct sk_buff_head *rcvq,
@@ -1914,7 +1913,7 @@ int udp_ioctl(struct sock *sk, int cmd, int *karg)
return 0;
}
-EXPORT_SYMBOL(udp_ioctl);
+EXPORT_IPV6_MOD(udp_ioctl);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
int *off, int *err)
@@ -2010,7 +2009,7 @@ try_again:
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
return recv_actor(sk, skb);
}
-EXPORT_SYMBOL(udp_read_skb);
+EXPORT_IPV6_MOD(udp_read_skb);
/*
* This should be easy, if there is something there we
@@ -2137,7 +2136,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len);
}
-EXPORT_SYMBOL(udp_pre_connect);
+EXPORT_IPV6_MOD(udp_pre_connect);
static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -2186,7 +2185,7 @@ int udp_disconnect(struct sock *sk, int flags)
release_sock(sk);
return 0;
}
-EXPORT_SYMBOL(udp_disconnect);
+EXPORT_IPV6_MOD(udp_disconnect);
void udp_lib_unhash(struct sock *sk)
{
@@ -2216,7 +2215,7 @@ void udp_lib_unhash(struct sock *sk)
spin_unlock_bh(&hslot->lock);
}
}
-EXPORT_SYMBOL(udp_lib_unhash);
+EXPORT_IPV6_MOD(udp_lib_unhash);
/*
* inet_rcv_saddr was changed, we must rehash secondary hash
@@ -2280,7 +2279,7 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
}
}
}
-EXPORT_SYMBOL(udp_lib_rehash);
+EXPORT_IPV6_MOD(udp_lib_rehash);
void udp_v4_rehash(struct sock *sk)
{
@@ -2485,7 +2484,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
}
return false;
}
-EXPORT_SYMBOL(udp_sk_rx_dst_set);
+EXPORT_IPV6_MOD(udp_sk_rx_dst_set);
/*
* Multicasts and broadcasts go to each listener.
@@ -3041,7 +3040,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
return err;
}
-EXPORT_SYMBOL(udp_lib_setsockopt);
+EXPORT_IPV6_MOD(udp_lib_setsockopt);
int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen)
@@ -3112,7 +3111,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL(udp_lib_getsockopt);
+EXPORT_IPV6_MOD(udp_lib_getsockopt);
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
@@ -3154,7 +3153,7 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
return mask;
}
-EXPORT_SYMBOL(udp_poll);
+EXPORT_IPV6_MOD(udp_poll);
int udp_abort(struct sock *sk, int err)
{
@@ -3177,7 +3176,7 @@ out:
return 0;
}
-EXPORT_SYMBOL_GPL(udp_abort);
+EXPORT_IPV6_MOD_GPL(udp_abort);
struct proto udp_prot = {
.name = "UDP",
@@ -3311,7 +3310,7 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos)
return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
-EXPORT_SYMBOL(udp_seq_start);
+EXPORT_IPV6_MOD(udp_seq_start);
void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
@@ -3325,7 +3324,7 @@ void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
return sk;
}
-EXPORT_SYMBOL(udp_seq_next);
+EXPORT_IPV6_MOD(udp_seq_next);
void udp_seq_stop(struct seq_file *seq, void *v)
{
@@ -3337,7 +3336,7 @@ void udp_seq_stop(struct seq_file *seq, void *v)
if (state->bucket <= udptable->mask)
spin_unlock_bh(&udptable->hash[state->bucket].lock);
}
-EXPORT_SYMBOL(udp_seq_stop);
+EXPORT_IPV6_MOD(udp_seq_stop);
/* ------------------------------------------------------------------------ */
static void udp4_format_sock(struct sock *sp, struct seq_file *f,
@@ -3616,7 +3615,7 @@ const struct seq_operations udp_seq_ops = {
.stop = udp_seq_stop,
.show = udp4_seq_show,
};
-EXPORT_SYMBOL(udp_seq_ops);
+EXPORT_IPV6_MOD(udp_seq_ops);
static struct udp_seq_afinfo udp4_seq_afinfo = {
.family = AF_INET,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 67d39114d9a6..0144d01417d9 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -340,12 +340,12 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
return 0;
- if (fib_rule_port_range_set(&rule->sport_range) &&
- !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask,
+ fl6->fl6_sport))
return 0;
- if (fib_rule_port_range_set(&rule->dport_range) &&
- !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask,
+ fl6->fl6_dport))
return 0;
return 1;
@@ -399,9 +399,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
+ struct fib6_rule *rule6 = (struct fib6_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct net *net = sock_net(skb->sk);
- struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8699d1a188dc..ecb5c4b8518f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1680,7 +1680,7 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
{
struct net_device *dev = skb->dev;
- struct net *net = dev_net(dev);
+ struct net *net = dev_net_rcu(dev);
struct sock *sk = net->ipv6.ndisc_sk;
int optlen = 0;
struct inet_peer *peer;
@@ -1695,8 +1695,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
bool ret;
- if (netif_is_l3_master(skb->dev)) {
- dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif);
+ if (netif_is_l3_master(dev)) {
+ dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
if (!dev)
return;
}
@@ -1734,10 +1734,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
goto release;
}
- rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
- rcu_read_unlock();
if (!ret)
goto release;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 46b8adf6e7f8..84d90dd8b3f0 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,9 +119,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
ipcm6_init_sk(&ipc6, sk);
- ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
- ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a45aba090aa4..fda640ebd53f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -769,19 +769,16 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
hdrincl = inet_test_bit(HDRINCL, sk);
+ ipcm6_init_sk(&ipc6, sk);
+
/*
* Get and verify the address.
*/
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
+ fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
- ipcm6_init(&ipc6);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
- ipc6.sockc.mark = fl6.flowi6_mark;
- ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
-
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -891,9 +888,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
@@ -904,9 +898,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2debdf085a3b..a80608260298 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -798,6 +798,8 @@ static void tcp_v6_init_req(struct request_sock *req,
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+ ireq->ir_rmt_addr = LOOPBACK4_IPV6;
+ ireq->ir_loc_addr = LOOPBACK4_IPV6;
/* So that link locals have meaning */
if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
@@ -1451,10 +1453,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
ip6_dst_store(newsk, dst, NULL, NULL);
- newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
newnp->saddr = ireq->ir_v6_loc_addr;
- newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
- newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
@@ -1507,9 +1506,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
tcp_initialize_rcv_mss(newsk);
- newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
- newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
-
#ifdef CONFIG_TCP_MD5SIG
l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6ea438b5c75..3a0d6c5a8286 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1494,11 +1494,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
- ipcm6_init(&ipc6);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.gso_size = READ_ONCE(up->gso_size);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
- ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
- ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
/* destination address check */
if (sin6) {
@@ -1704,9 +1701,6 @@ do_udp_sendmsg:
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
@@ -1752,8 +1746,6 @@ back_from_confirm:
WRITE_ONCE(up->pending, AF_INET6);
do_append_data:
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
&ipc6, fl6, dst_rt6_info(dst),
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index f4c1da070826..b98d13584c81 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -547,7 +547,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
- ipcm6_init(&ipc6);
+ ipcm6_init_sk(&ipc6, sk);
if (lsa) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -634,9 +634,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
@@ -648,9 +645,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
-
if (msg->msg_flags & MSG_CONFIRM)
goto do_confirm;
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index a29ff901df75..b9e451197902 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -40,17 +40,17 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
tp->copied_seq += skb->len;
subflow->ssn_offset += skb->len;
- /* initialize a dummy sequence number, we will update it at MPC
- * completion, if needed
- */
+ /* Only the sequence delta is relevant */
MPTCP_SKB_CB(skb)->map_seq = -skb->len;
MPTCP_SKB_CB(skb)->end_seq = 0;
MPTCP_SKB_CB(skb)->offset = 0;
MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+ MPTCP_SKB_CB(skb)->cant_coalesce = 1;
mptcp_data_lock(sk);
+ DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
- mptcp_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
mptcp_sk(sk)->bytes_received += skb->len;
@@ -58,22 +58,3 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}
-
-void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
-{
- struct sock *sk = (struct sock *)msk;
- struct sk_buff *skb;
-
- skb = skb_peek_tail(&sk->sk_receive_queue);
- if (skb) {
- WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
- pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk,
- MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
- MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
- MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
- MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
- }
-
- pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq);
-}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 16c336c51940..b1f36dc1a091 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -10,6 +10,7 @@
#include "protocol.h"
#include "mib.h"
+#include "mptcp_pm_gen.h"
/* path manager command handlers */
@@ -433,14 +434,62 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_is_backup(msk, &skc_local);
}
-int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
+static int mptcp_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
+ struct genl_info *info)
{
if (info->attrs[MPTCP_PM_ATTR_TOKEN])
- return mptcp_userspace_pm_get_addr(skb, info);
- return mptcp_pm_nl_get_addr(skb, info);
+ return mptcp_userspace_pm_get_addr(id, addr, info);
+ return mptcp_pm_nl_get_addr(id, addr, info);
}
-int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb)
+int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct mptcp_pm_addr_entry addr;
+ struct nlattr *attr;
+ struct sk_buff *msg;
+ void *reply;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR))
+ return -EINVAL;
+
+ attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
+ if (ret < 0)
+ return ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
+ info->genlhdr->cmd);
+ if (!reply) {
+ GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
+ ret = -EMSGSIZE;
+ goto fail;
+ }
+
+ ret = mptcp_pm_get_addr(addr.addr.id, &addr, info);
+ if (ret) {
+ NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found");
+ goto fail;
+ }
+
+ ret = mptcp_nl_fill_addr(msg, &addr);
+ if (ret)
+ goto fail;
+
+ genlmsg_end(msg, reply);
+ ret = genlmsg_reply(msg, info);
+ return ret;
+
+fail:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb)
{
const struct genl_info *info = genl_info_dump(cb);
@@ -449,11 +498,34 @@ int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb)
return mptcp_pm_nl_dump_addr(msg, cb);
}
-int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return mptcp_pm_dump_addr(msg, cb);
+}
+
+static int mptcp_pm_set_flags(struct genl_info *info)
{
+ struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr_loc;
+ int ret = -EINVAL;
+
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR))
+ return ret;
+
+ attr_loc = info->attrs[MPTCP_PM_ATTR_ADDR];
+ ret = mptcp_pm_parse_entry(attr_loc, info, false, &loc);
+ if (ret < 0)
+ return ret;
+
if (info->attrs[MPTCP_PM_ATTR_TOKEN])
- return mptcp_userspace_pm_set_flags(skb, info);
- return mptcp_pm_nl_set_flags(skb, info);
+ return mptcp_userspace_pm_set_flags(&loc, info);
+ return mptcp_pm_nl_set_flags(&loc, info);
+}
+
+int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ return mptcp_pm_set_flags(info);
}
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 572d160edca3..99705a9c2238 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1393,28 +1393,35 @@ static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
+ struct nlattr *attr;
int ret;
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR))
+ return -EINVAL;
+
+ attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
ret = mptcp_pm_parse_entry(attr, info, true, &addr);
if (ret < 0)
return ret;
if (addr.addr.port && !address_use_port(&addr)) {
- GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port");
+ NL_SET_ERR_MSG_ATTR(info->extack, attr,
+ "flags must have signal and not subflow when using port");
return -EINVAL;
}
if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
- GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh");
+ NL_SET_ERR_MSG_ATTR(info->extack, attr,
+ "flags mustn't have both signal and fullmesh");
return -EINVAL;
}
if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
- GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint");
+ NL_SET_ERR_MSG_ATTR(info->extack, attr,
+ "can't create IMPLICIT endpoint");
return -EINVAL;
}
@@ -1587,12 +1594,16 @@ next:
int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
unsigned int addr_max;
+ struct nlattr *attr;
int ret;
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR))
+ return -EINVAL;
+
+ attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1608,7 +1619,7 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&pernet->lock);
entry = __lookup_addr_by_id(pernet, addr.addr.id);
if (!entry) {
- GENL_SET_ERR_MSG(info, "address not found");
+ NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found");
spin_unlock_bh(&pernet->lock);
return -EINVAL;
}
@@ -1762,61 +1773,24 @@ nla_put_failure:
return -EMSGSIZE;
}
-int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
+ struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
- struct mptcp_pm_addr_entry addr, *entry;
- struct sk_buff *msg;
- void *reply;
- int ret;
-
- ret = mptcp_pm_parse_entry(attr, info, false, &addr);
- if (ret < 0)
- return ret;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
- info->genlhdr->cmd);
- if (!reply) {
- GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
- ret = -EMSGSIZE;
- goto fail;
- }
+ struct mptcp_pm_addr_entry *entry;
+ int ret = -EINVAL;
rcu_read_lock();
- entry = __lookup_addr_by_id(pernet, addr.addr.id);
- if (!entry) {
- GENL_SET_ERR_MSG(info, "address not found");
- ret = -EINVAL;
- goto unlock_fail;
+ entry = __lookup_addr_by_id(pernet, id);
+ if (entry) {
+ *addr = *entry;
+ ret = 0;
}
-
- ret = mptcp_nl_fill_addr(msg, entry);
- if (ret)
- goto unlock_fail;
-
- genlmsg_end(msg, reply);
- ret = genlmsg_reply(msg, info);
- rcu_read_unlock();
- return ret;
-
-unlock_fail:
rcu_read_unlock();
-fail:
- nlmsg_free(msg);
return ret;
}
-int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
-{
- return mptcp_pm_get_addr(skb, info);
-}
-
int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
struct netlink_callback *cb)
{
@@ -1860,12 +1834,6 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
return msg->len;
}
-int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- return mptcp_pm_dump_addr(msg, cb);
-}
-
static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
{
struct nlattr *attr = info->attrs[id];
@@ -1875,7 +1843,9 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
*limit = nla_get_u32(attr);
if (*limit > MPTCP_PM_ADDR_MAX) {
- GENL_SET_ERR_MSG(info, "limit greater than maximum");
+ NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr,
+ "limit greater than maximum (%u)",
+ MPTCP_PM_ADDR_MAX);
return -EINVAL;
}
return 0;
@@ -1981,66 +1951,57 @@ next:
return ret;
}
-int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
+ struct genl_info *info)
{
- struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
MPTCP_PM_ADDR_FLAG_FULLMESH;
- struct net *net = sock_net(skb->sk);
+ struct net *net = genl_info_net(info);
struct mptcp_pm_addr_entry *entry;
struct pm_nl_pernet *pernet;
u8 lookup_by_id = 0;
u8 bkup = 0;
- int ret;
pernet = pm_nl_get_pernet(net);
- ret = mptcp_pm_parse_entry(attr, info, false, &addr);
- if (ret < 0)
- return ret;
-
- if (addr.addr.family == AF_UNSPEC) {
+ if (local->addr.family == AF_UNSPEC) {
lookup_by_id = 1;
- if (!addr.addr.id) {
- GENL_SET_ERR_MSG(info, "missing required inputs");
+ if (!local->addr.id) {
+ NL_SET_ERR_MSG_ATTR(info->extack, attr,
+ "missing address ID");
return -EOPNOTSUPP;
}
}
- if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
spin_lock_bh(&pernet->lock);
- entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) :
- __lookup_addr(pernet, &addr.addr);
+ entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) :
+ __lookup_addr(pernet, &local->addr);
if (!entry) {
spin_unlock_bh(&pernet->lock);
- GENL_SET_ERR_MSG(info, "address not found");
+ NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found");
return -EINVAL;
}
- if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+ if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
(entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
spin_unlock_bh(&pernet->lock);
- GENL_SET_ERR_MSG(info, "invalid addr flags");
+ NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags");
return -EINVAL;
}
- changed = (addr.flags ^ entry->flags) & mask;
- entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
- addr = *entry;
+ changed = (local->flags ^ entry->flags) & mask;
+ entry->flags = (entry->flags & ~mask) | (local->flags & mask);
+ *local = *entry;
spin_unlock_bh(&pernet->lock);
- mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
+ mptcp_nl_set_flags(net, &local->addr, bkup, changed);
return 0;
}
-int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info)
-{
- return mptcp_pm_set_flags(skb, info);
-}
-
static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
{
genlmsg_multicast_netns(&mptcp_genl_family, net,
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index a3d477059b11..277cf092a870 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -175,14 +175,13 @@ bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk,
static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *info)
{
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct mptcp_sock *msk;
+ struct nlattr *token;
- if (!token) {
- GENL_SET_ERR_MSG(info, "missing required token");
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_TOKEN))
return NULL;
- }
+ token = info->attrs[MPTCP_PM_ATTR_TOKEN];
msk = mptcp_token_get_sock(genl_info_net(info), nla_get_u32(token));
if (!msk) {
NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
@@ -190,7 +189,8 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in
}
if (!mptcp_pm_is_userspace(msk)) {
- GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ NL_SET_ERR_MSG_ATTR(info->extack, token,
+ "userspace PM not selected");
sock_put((struct sock *)msk);
return NULL;
}
@@ -200,16 +200,14 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry addr_val;
struct mptcp_sock *msk;
+ struct nlattr *addr;
int err = -EINVAL;
struct sock *sk;
- if (!addr) {
- GENL_SET_ERR_MSG(info, "missing required address");
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR))
return err;
- }
msk = mptcp_userspace_pm_get_sock(info);
if (!msk)
@@ -217,21 +215,27 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
sk = (struct sock *)msk;
+ addr = info->attrs[MPTCP_PM_ATTR_ADDR];
err = mptcp_pm_parse_entry(addr, info, true, &addr_val);
- if (err < 0) {
- GENL_SET_ERR_MSG(info, "error parsing local address");
+ if (err < 0)
+ goto announce_err;
+
+ if (addr_val.addr.id == 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr id");
+ err = -EINVAL;
goto announce_err;
}
- if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- GENL_SET_ERR_MSG(info, "invalid addr id or flags");
+ if (!(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr flags");
err = -EINVAL;
goto announce_err;
}
err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
if (err < 0) {
- GENL_SET_ERR_MSG(info, "did not match address and id");
+ NL_SET_ERR_MSG_ATTR(info->extack, addr,
+ "did not match address and id");
goto announce_err;
}
@@ -253,8 +257,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
return err;
}
-static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
- struct genl_info *info)
+static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk)
{
struct mptcp_rm_list list = { .nr = 0 };
struct mptcp_subflow_context *subflow;
@@ -269,10 +272,8 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
break;
}
}
- if (!has_id_0) {
- GENL_SET_ERR_MSG(info, "address with id 0 not found");
+ if (!has_id_0)
goto remove_err;
- }
list.ids[list.nr++] = 0;
@@ -309,18 +310,17 @@ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk,
int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
struct mptcp_pm_addr_entry *match;
struct mptcp_sock *msk;
+ struct nlattr *id;
int err = -EINVAL;
struct sock *sk;
u8 id_val;
- if (!id) {
- GENL_SET_ERR_MSG(info, "missing required ID");
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_LOC_ID))
return err;
- }
+ id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
id_val = nla_get_u8(id);
msk = mptcp_userspace_pm_get_sock(info);
@@ -330,7 +330,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
sk = (struct sock *)msk;
if (id_val == 0) {
- err = mptcp_userspace_pm_remove_id_zero_address(msk, info);
+ err = mptcp_userspace_pm_remove_id_zero_address(msk);
goto out;
}
@@ -339,7 +339,6 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&msk->pm.lock);
match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val);
if (!match) {
- GENL_SET_ERR_MSG(info, "address with specified id not found");
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
goto out;
@@ -356,25 +355,28 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
err = 0;
out:
+ if (err)
+ NL_SET_ERR_MSG_ATTR_FMT(info->extack, id,
+ "address with id %u not found",
+ id_val);
+
sock_put(sk);
return err;
}
int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry entry = { 0 };
struct mptcp_addr_info addr_r;
+ struct nlattr *raddr, *laddr;
struct mptcp_pm_local local;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
- if (!laddr || !raddr) {
- GENL_SET_ERR_MSG(info, "missing required address(es)");
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) ||
+ GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE))
return err;
- }
msk = mptcp_userspace_pm_get_sock(info);
if (!msk)
@@ -382,24 +384,22 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
sk = (struct sock *)msk;
+ laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
err = mptcp_pm_parse_entry(laddr, info, true, &entry);
- if (err < 0) {
- NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ if (err < 0)
goto create_err;
- }
if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
- GENL_SET_ERR_MSG(info, "invalid addr flags");
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "invalid addr flags");
err = -EINVAL;
goto create_err;
}
entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
- if (err < 0) {
- NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ if (err < 0)
goto create_err;
- }
if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
@@ -409,7 +409,8 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false);
if (err < 0) {
- GENL_SET_ERR_MSG(info, "did not match address and id");
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr,
+ "did not match address and id");
goto create_err;
}
@@ -421,6 +422,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
err = __mptcp_subflow_connect(sk, &local, &addr_r);
release_sock(sk);
+ if (err)
+ GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err);
+
spin_lock_bh(&msk->pm.lock);
if (err)
mptcp_userspace_pm_delete_local_addr(msk, &entry);
@@ -483,18 +487,16 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry addr_l;
struct mptcp_addr_info addr_r;
+ struct nlattr *raddr, *laddr;
struct mptcp_sock *msk;
struct sock *sk, *ssk;
int err = -EINVAL;
- if (!laddr || !raddr) {
- GENL_SET_ERR_MSG(info, "missing required address(es)");
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) ||
+ GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE))
return err;
- }
msk = mptcp_userspace_pm_get_sock(info);
if (!msk)
@@ -502,17 +504,15 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
sk = (struct sock *)msk;
+ laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
err = mptcp_pm_parse_entry(laddr, info, true, &addr_l);
- if (err < 0) {
- NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ if (err < 0)
goto destroy_err;
- }
+ raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
- if (err < 0) {
- NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ if (err < 0)
goto destroy_err;
- }
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (addr_l.addr.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
@@ -530,8 +530,14 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
goto destroy_err;
}
- if (!addr_l.addr.port || !addr_r.port) {
- GENL_SET_ERR_MSG(info, "missing local or remote port");
+ if (!addr_l.addr.port) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local port");
+ err = -EINVAL;
+ goto destroy_err;
+ }
+
+ if (!addr_r.port) {
+ NL_SET_ERR_MSG_ATTR(info->extack, raddr, "missing remote port");
err = -EINVAL;
goto destroy_err;
}
@@ -539,6 +545,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
lock_sock(sk);
ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r);
if (!ssk) {
+ GENL_SET_ERR_MSG(info, "subflow not found");
err = -ESRCH;
goto release_sock;
}
@@ -557,46 +564,51 @@ destroy_err:
return err;
}
-int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
+int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local,
+ struct genl_info *info)
{
- struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, };
- struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, };
- struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_addr_info rem = { .family = AF_UNSPEC, };
struct mptcp_pm_addr_entry *entry;
+ struct nlattr *attr, *attr_rem;
struct mptcp_sock *msk;
int ret = -EINVAL;
struct sock *sk;
u8 bkup = 0;
+ if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE))
+ return ret;
+
msk = mptcp_userspace_pm_get_sock(info);
if (!msk)
return ret;
sk = (struct sock *)msk;
- ret = mptcp_pm_parse_entry(attr, info, false, &loc);
- if (ret < 0)
+ attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ if (local->addr.family == AF_UNSPEC) {
+ NL_SET_ERR_MSG_ATTR(info->extack, attr,
+ "invalid local address family");
+ ret = -EINVAL;
goto set_flags_err;
-
- if (attr_rem) {
- ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
- if (ret < 0)
- goto set_flags_err;
}
- if (loc.addr.family == AF_UNSPEC ||
- rem.addr.family == AF_UNSPEC) {
- GENL_SET_ERR_MSG(info, "invalid address families");
+ attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ ret = mptcp_pm_parse_addr(attr_rem, info, &rem);
+ if (ret < 0)
+ goto set_flags_err;
+
+ if (rem.family == AF_UNSPEC) {
+ NL_SET_ERR_MSG_ATTR(info->extack, attr_rem,
+ "invalid remote address family");
ret = -EINVAL;
goto set_flags_err;
}
- if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
spin_lock_bh(&msk->pm.lock);
- entry = mptcp_userspace_pm_lookup_addr(msk, &loc.addr);
+ entry = mptcp_userspace_pm_lookup_addr(msk, &local->addr);
if (entry) {
if (bkup)
entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
@@ -606,9 +618,13 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&msk->pm.lock);
lock_sock(sk);
- ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, &rem, bkup);
release_sock(sk);
+ /* mptcp_pm_nl_mp_prio_send_ack() only fails in one case */
+ if (ret < 0)
+ GENL_SET_ERR_MSG(info, "subflow not found");
+
set_flags_err:
sock_put(sk);
return ret;
@@ -663,16 +679,13 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
return ret;
}
-int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
+int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
- struct mptcp_pm_addr_entry addr, *entry;
+ struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
- struct sk_buff *msg;
int ret = -EINVAL;
struct sock *sk;
- void *reply;
msk = mptcp_userspace_pm_get_sock(info);
if (!msk)
@@ -680,50 +693,16 @@ int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
sk = (struct sock *)msk;
- ret = mptcp_pm_parse_entry(attr, info, false, &addr);
- if (ret < 0)
- goto out;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg) {
- ret = -ENOMEM;
- goto out;
- }
-
- reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
- info->genlhdr->cmd);
- if (!reply) {
- GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
- ret = -EMSGSIZE;
- goto fail;
- }
-
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
- entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id);
- if (!entry) {
- GENL_SET_ERR_MSG(info, "address not found");
- ret = -EINVAL;
- goto unlock_fail;
+ entry = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
+ if (entry) {
+ *addr = *entry;
+ ret = 0;
}
-
- ret = mptcp_nl_fill_addr(msg, entry);
- if (ret)
- goto unlock_fail;
-
- genlmsg_end(msg, reply);
- ret = genlmsg_reply(msg, info);
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
- sock_put(sk);
- return ret;
-unlock_fail:
- spin_unlock_bh(&msk->pm.lock);
- release_sock(sk);
-fail:
- nlmsg_free(msg);
-out:
sock_put(sk);
return ret;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6bd819047470..6b61b7dee33b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -118,24 +118,14 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
-static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
-{
- WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
- mptcp_sk(sk)->rmem_fwd_alloc + size);
-}
-
-static void mptcp_rmem_charge(struct sock *sk, int size)
-{
- mptcp_rmem_fwd_alloc_add(sk, -size);
-}
-
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
struct sk_buff *from)
{
bool fragstolen;
int delta;
- if (MPTCP_SKB_CB(from)->offset ||
+ if (unlikely(MPTCP_SKB_CB(to)->cant_coalesce) ||
+ MPTCP_SKB_CB(from)->offset ||
((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
!skb_try_coalesce(to, from, &fragstolen, &delta))
return false;
@@ -150,7 +140,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
* negative one
*/
atomic_add(delta, &sk->sk_rmem_alloc);
- mptcp_rmem_charge(sk, delta);
+ sk_mem_charge(sk, delta);
kfree_skb_partial(from, fragstolen);
return true;
@@ -165,44 +155,6 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
return mptcp_try_coalesce((struct sock *)msk, to, from);
}
-static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
-{
- amount >>= PAGE_SHIFT;
- mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
- __sk_mem_reduce_allocated(sk, amount);
-}
-
-static void mptcp_rmem_uncharge(struct sock *sk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
- int reclaimable;
-
- mptcp_rmem_fwd_alloc_add(sk, size);
- reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
-
- /* see sk_mem_uncharge() for the rationale behind the following schema */
- if (unlikely(reclaimable >= PAGE_SIZE))
- __mptcp_rmem_reclaim(sk, reclaimable);
-}
-
-static void mptcp_rfree(struct sk_buff *skb)
-{
- unsigned int len = skb->truesize;
- struct sock *sk = skb->sk;
-
- atomic_sub(len, &sk->sk_rmem_alloc);
- mptcp_rmem_uncharge(sk, len);
-}
-
-void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
-{
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = mptcp_rfree;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
- mptcp_rmem_charge(sk, skb->truesize);
-}
-
/* "inspired" by tcp_data_queue_ofo(), main differences:
* - use mptcp seqs
* - don't cope with sacks
@@ -315,25 +267,7 @@ merge_right:
end:
skb_condense(skb);
- mptcp_set_owner_r(skb, sk);
-}
-
-static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
- int amt, amount;
-
- if (size <= msk->rmem_fwd_alloc)
- return true;
-
- size -= msk->rmem_fwd_alloc;
- amt = sk_mem_pages(size);
- amount = amt << PAGE_SHIFT;
- if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
- return false;
-
- mptcp_rmem_fwd_alloc_add(sk, amount);
- return true;
+ skb_set_owner_r(skb, sk);
}
static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -351,7 +285,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
skb_orphan(skb);
/* try to fetch required memory from subflow */
- if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) {
+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
goto drop;
}
@@ -366,6 +300,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
MPTCP_SKB_CB(skb)->offset = offset;
MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
+ MPTCP_SKB_CB(skb)->cant_coalesce = 0;
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
@@ -375,7 +310,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
if (tail && mptcp_try_coalesce(sk, tail, skb))
return true;
- mptcp_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
return true;
} else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
@@ -561,7 +496,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied)
bool cleanup, rx_empty;
cleanup = (space > 0) && (space >= (old_space << 1)) && copied;
- rx_empty = !__mptcp_rmem(sk) && copied;
+ rx_empty = !sk_rmem_alloc_get(sk) && copied;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -634,27 +569,13 @@ static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
}
static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
- struct sock *ssk,
- unsigned int *bytes)
+ struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = (struct sock *)msk;
- unsigned int moved = 0;
bool more_data_avail;
struct tcp_sock *tp;
- bool done = false;
- int sk_rbuf;
-
- sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
-
- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
-
- if (unlikely(ssk_rbuf > sk_rbuf)) {
- WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf);
- sk_rbuf = ssk_rbuf;
- }
- }
+ bool ret = false;
pr_debug("msk=%p ssk=%p\n", msk, ssk);
tp = tcp_sk(ssk);
@@ -664,20 +585,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
struct sk_buff *skb;
bool fin;
+ if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
+ break;
+
/* try to move as much data as available */
map_remaining = subflow->map_data_len -
mptcp_subflow_get_map_offset(subflow);
skb = skb_peek(&ssk->sk_receive_queue);
- if (!skb) {
- /* With racing move_skbs_to_msk() and __mptcp_move_skbs(),
- * a different CPU can have already processed the pending
- * data, stop here or we can enter an infinite loop
- */
- if (!moved)
- done = true;
+ if (unlikely(!skb))
break;
- }
if (__mptcp_check_fallback(msk)) {
/* Under fallback skbs have no MPTCP extension and TCP could
@@ -690,19 +607,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
offset = seq - TCP_SKB_CB(skb)->seq;
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
- if (fin) {
- done = true;
+ if (fin)
seq++;
- }
if (offset < skb->len) {
size_t len = skb->len - offset;
- if (tp->urg_data)
- done = true;
-
- if (__mptcp_move_skb(msk, ssk, skb, offset, len))
- moved += len;
+ ret = __mptcp_move_skb(msk, ssk, skb, offset, len) || ret;
seq += len;
if (unlikely(map_remaining < len)) {
@@ -716,22 +627,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
}
sk_eat_skb(ssk, skb);
- done = true;
}
WRITE_ONCE(tp->copied_seq, seq);
more_data_avail = mptcp_subflow_data_available(ssk);
- if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) {
- done = true;
- break;
- }
} while (more_data_avail);
- if (moved > 0)
+ if (ret)
msk->last_data_recv = tcp_jiffies32;
- *bytes += moved;
- return done;
+ return ret;
}
static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
@@ -825,9 +730,9 @@ void __mptcp_error_report(struct sock *sk)
static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
- unsigned int moved = 0;
+ bool moved;
- __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+ moved = __mptcp_move_skbs_from_subflow(msk, ssk);
__mptcp_ofo_queue(msk);
if (unlikely(ssk->sk_err)) {
if (!sock_owned_by_user(sk))
@@ -843,14 +748,29 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
*/
if (mptcp_pending_data_fin(sk, NULL))
mptcp_schedule_work(sk);
- return moved > 0;
+ return moved;
+}
+
+static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk)
+{
+ if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf))
+ WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf);
+}
+
+static void __mptcp_data_ready(struct sock *sk, struct sock *ssk)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ __mptcp_rcvbuf_update(sk, ssk);
+
+ /* Wake-up the reader only for in-sequence data */
+ if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
+ sk->sk_data_ready(sk);
}
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- struct mptcp_sock *msk = mptcp_sk(sk);
- int sk_rbuf, ssk_rbuf;
/* The peer can send data while we are shutting down this
* subflow at msk destruction time, but we must avoid enqueuing
@@ -859,19 +779,11 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
if (unlikely(subflow->disposable))
return;
- ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
- sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
- if (unlikely(ssk_rbuf > sk_rbuf))
- sk_rbuf = ssk_rbuf;
-
- /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
- if (__mptcp_rmem(sk) > sk_rbuf)
- return;
-
- /* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
- if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
- sk->sk_data_ready(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_data_ready(sk, ssk);
+ else
+ __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -950,20 +862,6 @@ bool mptcp_schedule_work(struct sock *sk)
return false;
}
-static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
-
- msk_owned_by_me(msk);
-
- mptcp_for_each_subflow(msk, subflow) {
- if (READ_ONCE(subflow->data_avail))
- return mptcp_subflow_tcp_sock(subflow);
- }
-
- return NULL;
-}
-
static bool mptcp_skb_can_collapse_to(u64 write_seq,
const struct sk_buff *skb,
const struct mptcp_ext *mpext)
@@ -1944,16 +1842,17 @@ do_error:
static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
-static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
+static int __mptcp_recvmsg_mskq(struct sock *sk,
struct msghdr *msg,
size_t len, int flags,
struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
+ struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb, *tmp;
int copied = 0;
- skb_queue_walk_safe(&msk->receive_queue, skb, tmp) {
+ skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
u32 offset = MPTCP_SKB_CB(skb)->offset;
u32 data_len = skb->len - offset;
u32 count = min_t(size_t, len - copied, data_len);
@@ -1985,10 +1884,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
}
if (!(flags & MSG_PEEK)) {
- /* we will bulk release the skb memory later */
+ /* avoid the indirect call, we know the destructor is sock_wfree */
skb->destructor = NULL;
- WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
- __skb_unlink(skb, &msk->receive_queue);
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ sk_mem_uncharge(sk, skb->truesize);
+ __skb_unlink(skb, &sk->sk_receive_queue);
__kfree_skb(skb);
msk->bytes_consumed += count;
}
@@ -2101,66 +2001,65 @@ new_measure:
msk->rcvq_space.time = mstamp;
}
-static void __mptcp_update_rmem(struct sock *sk)
+static struct mptcp_subflow_context *
+__mptcp_first_ready_from(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_subflow_context *start_subflow = subflow;
- if (!msk->rmem_released)
- return;
-
- atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
- mptcp_rmem_uncharge(sk, msk->rmem_released);
- WRITE_ONCE(msk->rmem_released, 0);
+ while (!READ_ONCE(subflow->data_avail)) {
+ subflow = mptcp_next_subflow(msk, subflow);
+ if (subflow == start_subflow)
+ return NULL;
+ }
+ return subflow;
}
-static void __mptcp_splice_receive_queue(struct sock *sk)
+static bool __mptcp_move_skbs(struct sock *sk)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
+ bool ret = false;
- skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
-}
+ if (list_empty(&msk->conn_list))
+ return false;
-static bool __mptcp_move_skbs(struct mptcp_sock *msk)
-{
- struct sock *sk = (struct sock *)msk;
- unsigned int moved = 0;
- bool ret, done;
+ /* verify we can move any data from the subflow, eventually updating */
+ if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
+ mptcp_for_each_subflow(msk, subflow)
+ __mptcp_rcvbuf_update(sk, subflow->tcp_sock);
- do {
- struct sock *ssk = mptcp_subflow_recv_lookup(msk);
+ subflow = list_first_entry(&msk->conn_list,
+ struct mptcp_subflow_context, node);
+ for (;;) {
+ struct sock *ssk;
bool slowpath;
- /* we can have data pending in the subflows only if the msk
- * receive buffer was full at subflow_data_ready() time,
- * that is an unlikely slow path.
+ /*
+ * As an optimization avoid traversing the subflows list
+ * and ev. acquiring the subflow socket lock before baling out
*/
- if (likely(!ssk))
+ if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
break;
- slowpath = lock_sock_fast(ssk);
- mptcp_data_lock(sk);
- __mptcp_update_rmem(sk);
- done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
- mptcp_data_unlock(sk);
+ subflow = __mptcp_first_ready_from(msk, subflow);
+ if (!subflow)
+ break;
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ slowpath = lock_sock_fast(ssk);
+ ret = __mptcp_move_skbs_from_subflow(msk, ssk) || ret;
if (unlikely(ssk->sk_err))
__mptcp_error_report(sk);
unlock_sock_fast(ssk, slowpath);
- } while (!done);
- /* acquire the data lock only if some input data is pending */
- ret = moved > 0;
- if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) ||
- !skb_queue_empty_lockless(&sk->sk_receive_queue)) {
- mptcp_data_lock(sk);
- __mptcp_update_rmem(sk);
- ret |= __mptcp_ofo_queue(msk);
- __mptcp_splice_receive_queue(sk);
- mptcp_data_unlock(sk);
+ subflow = mptcp_next_subflow(msk, subflow);
}
+
+ __mptcp_ofo_queue(msk);
if (ret)
mptcp_check_data_fin((struct sock *)msk);
- return !skb_queue_empty(&msk->receive_queue);
+ return ret;
}
static unsigned int mptcp_inq_hint(const struct sock *sk)
@@ -2168,7 +2067,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
const struct mptcp_sock *msk = mptcp_sk(sk);
const struct sk_buff *skb;
- skb = skb_peek(&msk->receive_queue);
+ skb = skb_peek(&sk->sk_receive_queue);
if (skb) {
u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
@@ -2214,7 +2113,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
while (copied < len) {
int err, bytes_read;
- bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
+ bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@@ -2223,7 +2122,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
copied += bytes_read;
- if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
+ if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk))
continue;
/* only the MPTCP socket status is relevant here. The exit
@@ -2249,7 +2148,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
/* race breaker: the shutdown could be after the
* previous receive queue check
*/
- if (__mptcp_move_skbs(msk))
+ if (__mptcp_move_skbs(sk))
continue;
break;
}
@@ -2293,9 +2192,8 @@ out_err:
}
}
- pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
- msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
- skb_queue_empty(&msk->receive_queue), copied);
+ pr_debug("msk=%p rx queue empty=%d copied=%d\n",
+ msk, skb_queue_empty(&sk->sk_receive_queue), copied);
release_sock(sk);
return copied;
@@ -2822,11 +2720,8 @@ static void __mptcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&msk->join_list);
INIT_LIST_HEAD(&msk->rtx_queue);
INIT_WORK(&msk->work, mptcp_worker);
- __skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- WRITE_ONCE(msk->rmem_fwd_alloc, 0);
- WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
@@ -3052,8 +2947,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
- WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
- WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -3405,18 +3298,12 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags);
- /* move to sk_receive_queue, sk_stream_kill_queues will purge it */
- mptcp_data_lock(sk);
- skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
- mptcp_data_unlock(sk);
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
* inet_sock_destruct() will dispose it
*/
- sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
- WRITE_ONCE(msk->rmem_fwd_alloc, 0);
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
mptcp_free_local_addr_list(msk);
@@ -3453,7 +3340,8 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
BIT(MPTCP_RETRANSMIT) | \
- BIT(MPTCP_FLUSH_JOIN_LIST))
+ BIT(MPTCP_FLUSH_JOIN_LIST) | \
+ BIT(MPTCP_DEQUEUE))
/* processes deferred events and flush wmem */
static void mptcp_release_cb(struct sock *sk)
@@ -3487,6 +3375,11 @@ static void mptcp_release_cb(struct sock *sk)
__mptcp_push_pending(sk, 0);
if (flags & BIT(MPTCP_RETRANSMIT))
__mptcp_retrans(sk);
+ if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) {
+ /* notify ack seq update */
+ mptcp_cleanup_rbuf(msk, 0);
+ sk->sk_data_ready(sk);
+ }
cond_resched();
spin_lock_bh(&sk->sk_lock.slock);
@@ -3506,8 +3399,6 @@ static void mptcp_release_cb(struct sock *sk)
if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags))
__mptcp_sync_sndbuf(sk);
}
-
- __mptcp_update_rmem(sk);
}
/* MP_JOIN client subflow must wait for 4th ack before sending any data:
@@ -3678,12 +3569,6 @@ static void mptcp_shutdown(struct sock *sk, int how)
__mptcp_wr_shutdown(sk);
}
-static int mptcp_forward_alloc_get(const struct sock *sk)
-{
- return READ_ONCE(sk->sk_forward_alloc) +
- READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
-}
-
static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
{
const struct sock *sk = (void *)msk;
@@ -3724,7 +3609,8 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
return -EINVAL;
lock_sock(sk);
- __mptcp_move_skbs(msk);
+ if (__mptcp_move_skbs(sk))
+ mptcp_cleanup_rbuf(msk, 0);
*karg = mptcp_inq_hint(sk);
release_sock(sk);
break;
@@ -3841,7 +3727,6 @@ static struct proto mptcp_prot = {
.hash = mptcp_hash,
.unhash = mptcp_unhash,
.get_port = mptcp_get_port,
- .forward_alloc_get = mptcp_forward_alloc_get,
.stream_memory_free = mptcp_stream_memory_free,
.sockets_allocated = &mptcp_sockets_allocated,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f6a207958459..ca65f8bff632 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -124,12 +124,14 @@
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_SYNC_STATE 6
#define MPTCP_SYNC_SNDBUF 7
+#define MPTCP_DEQUEUE 8
struct mptcp_skb_cb {
u64 map_seq;
u64 end_seq;
u32 offset;
- u8 has_rxtstamp:1;
+ u8 has_rxtstamp;
+ u8 cant_coalesce;
};
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
@@ -279,7 +281,6 @@ struct mptcp_sock {
u64 rcv_data_fin_seq;
u64 bytes_retrans;
u64 bytes_consumed;
- int rmem_fwd_alloc;
int snd_burst;
int old_wspace;
u64 recovery_snd_nxt; /* in recovery mode accept up to this seq;
@@ -294,7 +295,6 @@ struct mptcp_sock {
u32 last_ack_recv;
unsigned long timer_ival;
u32 token;
- int rmem_released;
unsigned long flags;
unsigned long cb_flags;
bool recovery; /* closing subflow write queue reinjected */
@@ -324,7 +324,6 @@ struct mptcp_sock {
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
- struct sk_buff_head receive_queue;
struct list_head conn_list;
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
@@ -355,6 +354,8 @@ struct mptcp_sock {
list_for_each_entry(__subflow, &((__msk)->conn_list), node)
#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \
list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+#define mptcp_next_subflow(__msk, __subflow) \
+ list_next_entry_circular(__subflow, &((__msk)->conn_list), node)
extern struct genl_family mptcp_genl_family;
@@ -381,14 +382,6 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk)
#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk)
#endif
-/* the msk socket don't use the backlog, also account for the bulk
- * free memory
- */
-static inline int __mptcp_rmem(const struct sock *sk)
-{
- return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
-}
-
static inline int mptcp_win_from_space(const struct sock *sk, int space)
{
return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
@@ -401,7 +394,8 @@ static inline int mptcp_space_from_win(const struct sock *sk, int win)
static inline int __mptcp_space(const struct sock *sk)
{
- return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
+ return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
+ sk_rmem_alloc_get(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@@ -1038,9 +1032,10 @@ bool mptcp_lookup_subflow_by_saddr(const struct list_head *list,
const struct mptcp_addr_info *saddr);
bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
-int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
-int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
-int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
+ struct genl_info *info);
+int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local,
+ struct genl_info *info);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
@@ -1058,8 +1053,6 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
-void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
int mptcp_nl_fill_addr(struct sk_buff *skb,
@@ -1131,14 +1124,13 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_in
bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc);
bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
-int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb);
int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
struct netlink_callback *cb);
int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
struct netlink_callback *cb);
-int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info);
-int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info);
-int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
+int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
+ struct genl_info *info);
+int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
struct genl_info *info);
static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fd021cf8286e..d2caffa56bdd 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -802,9 +802,6 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
subflow_set_remote_key(msk, subflow, mp_opt);
WRITE_ONCE(subflow->fully_established, true);
WRITE_ONCE(msk->fully_established, true);
-
- if (subflow->is_mptfo)
- __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -1271,7 +1268,12 @@ out:
subflow->map_valid = 0;
}
-/* sched mptcp worker to remove the subflow if no more data is pending */
+static bool subflow_is_done(const struct sock *sk)
+{
+ return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
+}
+
+/* sched mptcp worker for subflow cleanup if no more data is pending */
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
@@ -1281,8 +1283,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
inet_sk_state_load(sk) != TCP_ESTABLISHED)))
return;
- if (skb_queue_empty(&ssk->sk_receive_queue) &&
- !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ if (!skb_queue_empty(&ssk->sk_receive_queue))
+ return;
+
+ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ mptcp_schedule_work(sk);
+
+ /* when the fallback subflow closes the rx side, trigger a 'dummy'
+ * ingress data fin, so that the msk state will follow along
+ */
+ if (__mptcp_check_fallback(msk) && subflow_is_done(ssk) &&
+ msk->first == ssk &&
+ mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
mptcp_schedule_work(sk);
}
@@ -1842,11 +1854,6 @@ static void __subflow_state_change(struct sock *sk)
rcu_read_unlock();
}
-static bool subflow_is_done(const struct sock *sk)
-{
- return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
-}
-
static void subflow_state_change(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -1873,13 +1880,6 @@ static void subflow_state_change(struct sock *sk)
subflow_error_report(sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
-
- /* when the fallback subflow closes the rx side, trigger a 'dummy'
- * ingress data fin, so that the msk state will follow along
- */
- if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk &&
- mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
- mptcp_schedule_work(parent);
}
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 85311226183a..f8f13058a46e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -771,6 +771,7 @@ static int netlink_release(struct socket *sock)
nlk->cb.done(&nlk->cb);
module_put(nlk->cb.module);
kfree_skb(nlk->cb.skb);
+ WRITE_ONCE(nlk->cb_running, false);
}
module_put(nlk->module);
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index ba91284f4086..e6cf4eb06b46 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -78,17 +78,6 @@ static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name)
return NULL;
}
-void nfc_llc_unregister(const char *name)
-{
- struct nfc_llc_engine *llc_engine;
-
- llc_engine = nfc_llc_name_to_engine(name);
- if (llc_engine == NULL)
- return;
-
- nfc_llc_del_engine(llc_engine);
-}
-
struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
xmit_to_drv_t xmit_to_drv,
rcv_to_hci_t rcv_to_hci, int tx_headroom,
diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
index d66271d211a5..09914608ec43 100644
--- a/net/nfc/hci/llc.h
+++ b/net/nfc/hci/llc.h
@@ -40,7 +40,6 @@ struct nfc_llc {
void *nfc_llc_get_data(struct nfc_llc *llc);
int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops);
-void nfc_llc_unregister(const char *name);
int nfc_llc_nop_register(void);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c131e5ceea37..3e9ddf72cd03 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2102,8 +2102,8 @@ retry:
skb->protocol = proto;
skb->dev = dev;
- skb->priority = READ_ONCE(sk->sk_priority);
- skb->mark = READ_ONCE(sk->sk_mark);
+ skb->priority = sockc.priority;
+ skb->mark = sockc.mark;
skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
skb_setup_tx_timestamp(skb, &sockc);
@@ -2634,8 +2634,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->protocol = proto;
skb->dev = dev;
- skb->priority = READ_ONCE(po->sk.sk_priority);
- skb->mark = READ_ONCE(po->sk.sk_mark);
+ skb->priority = sockc->priority;
+ skb->mark = sockc->mark;
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid);
skb_setup_tx_timestamp(skb, sockc);
skb_zcopy_set_nouarg(skb, ph.raw);
@@ -3039,7 +3039,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_unlock;
sockcm_init(&sockc, sk);
- sockc.mark = READ_ONCE(sk->sk_mark);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 8996c73c9779..3f2e707a11d1 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -460,7 +460,7 @@ META_COLLECTOR(int_sk_fwd_alloc)
*err = -1;
return;
}
- dst->value = sk_forward_alloc_get(sk);
+ dst->value = READ_ONCE(sk->sk_forward_alloc);
}
META_COLLECTOR(int_sk_sndbuf)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 89d2bef96469..0edf25973072 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -875,7 +875,7 @@ static bool xsk_no_wakeup(struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL
/* Prefer busy-polling, skip the wakeup. */
return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) &&
- READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID;
+ napi_id_valid(READ_ONCE(sk->sk_napi_id));
#else
return false;
#endif
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 1f7975b49657..c263fb7a68dc 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -699,18 +699,56 @@ void xp_free(struct xdp_buff_xsk *xskb)
}
EXPORT_SYMBOL(xp_free);
-void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+static u64 __xp_raw_get_addr(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
+}
+
+static void *__xp_raw_get_data(const struct xsk_buff_pool *pool, u64 addr)
{
- addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
return pool->addrs + addr;
}
+
+void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+{
+ return __xp_raw_get_data(pool, __xp_raw_get_addr(pool, addr));
+}
EXPORT_SYMBOL(xp_raw_get_data);
-dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
+static dma_addr_t __xp_raw_get_dma(const struct xsk_buff_pool *pool, u64 addr)
{
- addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
return (pool->dma_pages[addr >> PAGE_SHIFT] &
~XSK_NEXT_PG_CONTIG_MASK) +
(addr & ~PAGE_MASK);
}
+
+dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
+{
+ return __xp_raw_get_dma(pool, __xp_raw_get_addr(pool, addr));
+}
EXPORT_SYMBOL(xp_raw_get_dma);
+
+/**
+ * xp_raw_get_ctx - get &xdp_desc context
+ * @pool: XSk buff pool desc address belongs to
+ * @addr: desc address (from userspace)
+ *
+ * Helper for getting desc's DMA address and metadata pointer, if present.
+ * Saves one call on hotpath, double calculation of the actual address,
+ * and inline checks for metadata presence and sanity.
+ *
+ * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata
+ * pointer, if it is present and valid (initialized to %NULL otherwise).
+ */
+struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ struct xdp_desc_ctx ret;
+
+ addr = __xp_raw_get_addr(pool, addr);
+
+ ret.dma = __xp_raw_get_dma(pool, addr);
+ ret.meta = __xsk_buff_get_metadata(pool, __xp_raw_get_data(pool, addr));
+
+ return ret;
+}
+EXPORT_SYMBOL(xp_raw_get_ctx);
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index e4be227d3ad6..4e82f3871473 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -87,6 +87,11 @@ enum {
};
enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
NETDEV_A_PAGE_POOL_NAPI_ID,
@@ -94,6 +99,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -131,11 +137,18 @@ enum {
};
enum {
+ __NETDEV_A_XSK_INFO_MAX,
+ NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
+ NETDEV_A_QUEUE_XSK,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 0712b5e82eb7..f3269ce39e5b 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -17,10 +17,13 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
- $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
+ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
+ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
+CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
+CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h)
CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index 08f8bf89cfc2..dcc2c6b298d6 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -536,9 +536,11 @@ class YnlFamily(SpecFamily):
try:
return int(value)
except (ValueError, TypeError) as e:
- if 'enum' not in attr_spec:
- raise e
- return self._encode_enum(attr_spec, value)
+ if 'enum' in attr_spec:
+ return self._encode_enum(attr_spec, value)
+ if attr_spec.display_hint:
+ return self._from_string(value, attr_spec)
+ raise e
def _add_attr(self, space, name, value, search_attrs):
try:
@@ -571,7 +573,10 @@ class YnlFamily(SpecFamily):
if isinstance(value, bytes):
attr_payload = value
elif isinstance(value, str):
- attr_payload = bytes.fromhex(value)
+ if attr.display_hint:
+ attr_payload = self._from_string(value, attr)
+ else:
+ attr_payload = bytes.fromhex(value)
elif isinstance(value, dict) and attr.struct_name:
attr_payload = self._encode_struct(attr.struct_name, value)
else:
@@ -627,6 +632,11 @@ class YnlFamily(SpecFamily):
decoded = self._decode_struct(attr.raw, attr_spec.struct_name)
elif attr_spec.sub_type:
decoded = attr.as_c_array(attr_spec.sub_type)
+ if 'enum' in attr_spec:
+ decoded = [ self._decode_enum(x, attr_spec) for x in decoded ]
+ elif attr_spec.display_hint:
+ decoded = [ self._formatted_string(x, attr_spec.display_hint)
+ for x in decoded ]
else:
decoded = attr.as_bin()
if attr_spec.display_hint:
@@ -644,15 +654,17 @@ class YnlFamily(SpecFamily):
subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes'])
decoded.append({ item.type: subattrs })
elif attr_spec["sub-type"] == 'binary':
- subattrs = item.as_bin()
+ subattr = item.as_bin()
if attr_spec.display_hint:
- subattrs = self._formatted_string(subattrs, attr_spec.display_hint)
- decoded.append(subattrs)
+ subattr = self._formatted_string(subattr, attr_spec.display_hint)
+ decoded.append(subattr)
elif attr_spec["sub-type"] in NlAttr.type_formats:
- subattrs = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order)
- if attr_spec.display_hint:
- subattrs = self._formatted_string(subattrs, attr_spec.display_hint)
- decoded.append(subattrs)
+ subattr = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order)
+ if 'enum' in attr_spec:
+ subattr = self._decode_enum(subattr, attr_spec)
+ elif attr_spec.display_hint:
+ subattr = self._formatted_string(subattr, attr_spec.display_hint)
+ decoded.append(subattr)
else:
raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}')
return decoded
@@ -899,6 +911,18 @@ class YnlFamily(SpecFamily):
formatted = raw
return formatted
+ def _from_string(self, string, attr_spec):
+ if attr_spec.display_hint in ['ipv4', 'ipv6']:
+ ip = ipaddress.ip_address(string)
+ if attr_spec['type'] == 'binary':
+ raw = ip.packed
+ else:
+ raw = int(ip)
+ else:
+ raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented"
+ f" when parsing '{attr_spec['name']}'")
+ return raw
+
def handle_ntf(self, decoded):
msg = dict()
if self.include_raw:
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index c2eabc90dce8..a1427c537030 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -74,6 +74,8 @@ class Type(SpecAttr):
self.c_name = c_lower(self.name)
if self.c_name in _C_KW:
self.c_name += '_'
+ if self.c_name[0].isdigit():
+ self.c_name = '_' + self.c_name
# Added by resolve():
self.enum_name = None
@@ -100,7 +102,7 @@ class Type(SpecAttr):
if isinstance(value, int):
return value
if value in self.family.consts:
- raise Exception("Resolving family constants not implemented, yet")
+ return self.family.consts[value]["value"]
return limit_to_number(value)
def get_limit_str(self, limit, default=None, suffix=''):
@@ -110,6 +112,9 @@ class Type(SpecAttr):
if isinstance(value, int):
return str(value) + suffix
if value in self.family.consts:
+ const = self.family.consts[value]
+ if const.get('header'):
+ return c_upper(value)
return c_upper(f"{self.family['name']}-{value}")
return c_upper(value)
@@ -683,7 +688,10 @@ class TypeArrayNest(Type):
raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
def _attr_typol(self):
- return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
+ if self.attr['sub-type'] in scalars:
+ return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
+ else:
+ return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
@@ -885,7 +893,7 @@ class AttrSet(SpecAttrSet):
elif elem['type'] == 'nest':
t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
- if elem["sub-type"] == 'nest':
+ if elem["sub-type"] in ['nest', 'u32']:
t = TypeArrayNest(self.family, self, elem, value)
else:
raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
@@ -1437,7 +1445,7 @@ class CodeWriter:
self._ifdef_block = config_option
-scalars = {'u8', 'u16', 'u32', 'u64', 's32', 's64', 'uint', 'sint'}
+scalars = {'u8', 'u16', 'u32', 'u64', 's8', 's16', 's32', 's64', 'uint', 'sint'}
direction_to_suffix = {
'reply': '_rsp',
@@ -1669,6 +1677,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if aspec["sub-type"] == 'nest':
local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
array_nests.add(arg)
+ elif aspec['sub-type'] in scalars:
+ local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
+ array_nests.add(arg)
else:
raise Exception(f'Not supported sub-type {aspec["sub-type"]}')
if 'multi-attr' in aspec:
@@ -1724,11 +1735,17 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
- ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
+ if 'nested-attributes' in aspec:
+ ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})")
- ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
- ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))")
- ri.cw.p('return YNL_PARSE_CB_ERROR;')
+ if 'nested-attributes' in aspec:
+ ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
+ ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))")
+ ri.cw.p('return YNL_PARSE_CB_ERROR;')
+ elif aspec.sub_type in scalars:
+ ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);")
+ else:
+ raise Exception(f"Nest parsing type not supported in {aspec['name']}")
ri.cw.p('i++;')
ri.cw.block_end()
ri.cw.block_end()
@@ -2549,6 +2566,9 @@ def render_uapi(family, cw):
defines = []
for const in family['definitions']:
+ if const.get('header'):
+ continue
+
if const['type'] != 'const':
cw.writes_defines(defines)
defines = []
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
new file mode 100644
index 000000000000..ec746f374e85
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+xdp_helper
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 137470bdee0c..0c95bd944d56 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,13 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
+CFLAGS += $(KHDR_INCLUDES)
TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
../../net/net_helper.sh \
../../net/lib.sh \
+TEST_GEN_FILES := xdp_helper
+
TEST_PROGS := \
netcons_basic.sh \
+ netcons_fragmented_msg.sh \
netcons_overflow.sh \
+ netcons_sysdata.sh \
ping.py \
queues.py \
stats.py \
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
index a2d8af60876d..f27172ddee0a 100644
--- a/tools/testing/selftests/drivers/net/config
+++ b/tools/testing/selftests/drivers/net/config
@@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 21ba64ce1e34..ae783e18be83 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -15,6 +15,7 @@ TEST_PROGS = \
nic_performance.py \
pp_alloc_fail.py \
rss_ctx.py \
+ tso.py \
#
TEST_FILES := \
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
index cb40497faee4..701aca1361e0 100755
--- a/tools/testing/selftests/drivers/net/hw/csum.py
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -9,15 +9,12 @@ from lib.py import ksft_run, ksft_exit, KsftSkipEx
from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen
-def test_receive(cfg, ipv4=False, extra_args=None):
+def test_receive(cfg, ipver="6", extra_args=None):
"""Test local nic checksum receive. Remote host sends crafted packets."""
if not cfg.have_rx_csum:
raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
- if ipv4:
- ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}"
- else:
- ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}"
+ ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
@@ -27,17 +24,14 @@ def test_receive(cfg, ipv4=False, extra_args=None):
cmd(tx_cmd, host=cfg.remote)
-def test_transmit(cfg, ipv4=False, extra_args=None):
+def test_transmit(cfg, ipver="6", extra_args=None):
"""Test local nic checksum transmit. Remote host verifies packets."""
if (not cfg.have_tx_csum_generic and
- not (cfg.have_tx_csum_ipv4 and ipv4) and
- not (cfg.have_tx_csum_ipv6 and not ipv4)):
+ not (cfg.have_tx_csum_ipv4 and ipver == "4") and
+ not (cfg.have_tx_csum_ipv6 and ipver == "6")):
raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
- if ipv4:
- ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}"
- else:
- ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}"
+ ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}"
# Cannot randomize input when calculating zero checksum
if extra_args != "-U -Z":
@@ -51,26 +45,20 @@ def test_transmit(cfg, ipv4=False, extra_args=None):
cmd(tx_cmd)
-def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""):
+def test_builder(name, cfg, ipver="6", tx=False, extra_args=""):
"""Construct specific tests from the common template.
Most tests follow the same basic pattern, differing only in
Direction of the test and optional flags passed to csum."""
def f(cfg):
- if ipv4:
- cfg.require_v4()
- else:
- cfg.require_v6()
+ cfg.require_ipver(ipver)
if tx:
- test_transmit(cfg, ipv4, extra_args)
+ test_transmit(cfg, ipver, extra_args)
else:
- test_receive(cfg, ipv4, extra_args)
+ test_receive(cfg, ipver, extra_args)
- if ipv4:
- f.__name__ = "ipv4_" + name
- else:
- f.__name__ = "ipv6_" + name
+ f.__name__ = f"ipv{ipver}_" + name
return f
@@ -100,19 +88,19 @@ def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
check_nic_features(cfg)
- cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum")
+ cfg.bin_local = cfg.rpath("../../../net/lib/csum")
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
cases = []
- for ipv4 in [True, False]:
- cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t"))
- cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E"))
+ for ipver in ["4", "6"]:
+ cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t"))
+ cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E"))
- cases.append(test_builder("rx_udp", cfg, ipv4, False, ""))
- cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E"))
+ cases.append(test_builder("rx_udp", cfg, ipver, False, ""))
+ cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E"))
- cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U"))
- cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z"))
+ cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U"))
+ cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z"))
ksft_run(cases=cases, args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 1223f0f5c10c..3947e9157115 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -21,15 +21,15 @@ def require_devmem(cfg):
@ksft_disruptive
def check_rx(cfg) -> None:
- cfg.require_v6()
+ cfg.require_ipver("6")
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}"
+ listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}"
with bkg(listen_cmd) as socat:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True)
+ cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 19a6969643f4..2bf14ac2b8c6 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -50,7 +50,6 @@
#include <linux/memfd.h>
#include <linux/dma-buf.h>
#include <linux/udmabuf.h>
-#include <libmnl/libmnl.h>
#include <linux/types.h>
#include <linux/netlink.h>
#include <linux/genetlink.h>
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 319aaa004c40..d6e69d7d5e43 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -4,7 +4,8 @@
import datetime
import random
import re
-from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true
+from lib.py import ksft_run, ksft_pr, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises
from lib.py import NetDrvEpEnv
from lib.py import EthtoolFamily, NetdevFamily
from lib.py import KsftSkipEx, KsftFailEx
@@ -58,6 +59,14 @@ def require_ntuple(cfg):
raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
+def require_context_cnt(cfg, need_cnt):
+ # There's no good API to get the context count, so the tests
+ # which try to add a lot opportunisitically set the count they
+ # discovered. Careful with test ordering!
+ if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt:
+ raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}")
+
+
# Get Rx packet counts for all queues, as a simple list of integers
# if @prev is specified the prev counts will be subtracted
def _get_rx_cnts(cfg, prev=None):
@@ -456,6 +465,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
raise
ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
ctx_cnt = i
+ if cfg.context_cnt is None:
+ cfg.context_cnt = ctx_cnt
break
_rss_key_check(cfg, context=ctx_id)
@@ -511,8 +522,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4):
"""
require_ntuple(cfg)
-
- requested_ctx_cnt = ctx_cnt
+ require_context_cnt(cfg, 4)
# Try to allocate more queues when necessary
qcnt = len(_get_rx_cnts(cfg))
@@ -577,9 +587,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4):
remove_ctx(-1)
check_traffic()
- if requested_ctx_cnt != ctx_cnt:
- raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
-
def test_rss_context_overlap(cfg, other_ctx=0):
"""
@@ -588,6 +595,8 @@ def test_rss_context_overlap(cfg, other_ctx=0):
"""
require_ntuple(cfg)
+ if other_ctx:
+ require_context_cnt(cfg, 2)
queue_cnt = len(_get_rx_cnts(cfg))
if queue_cnt < 4:
@@ -649,6 +658,29 @@ def test_rss_context_overlap2(cfg):
test_rss_context_overlap(cfg, True)
+def test_flow_add_context_missing(cfg):
+ """
+ Test that we are not allowed to add a rule pointing to an RSS context
+ which was never created.
+ """
+
+ require_ntuple(cfg)
+
+ # Find a context which doesn't exist
+ for ctx_id in range(1, 100):
+ try:
+ get_rss(cfg, context=ctx_id)
+ except CmdExitFailure:
+ break
+
+ with ksft_raises(CmdExitFailure) as cm:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ethtool(f"-N {cfg.ifname} delete {ntuple_id}")
+ if cm.exception:
+ ksft_in('Invalid argument', cm.exception.cmd.stderr)
+
+
def test_delete_rss_context_busy(cfg):
"""
Test that deletion returns -EBUSY when an rss context is being used
@@ -717,6 +749,7 @@ def test_rss_ntuple_addition(cfg):
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.context_cnt = None
cfg.ethnl = EthtoolFamily()
cfg.netdevnl = NetdevFamily()
@@ -726,6 +759,7 @@ def main() -> None:
test_rss_context_dump, test_rss_context_queue_reconfigure,
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
+ test_flow_add_context_missing,
test_delete_rss_context_busy, test_rss_ntuple_addition],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
new file mode 100755
index 000000000000..e1ecb92f79d9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_eq, ksft_ge, ksft_lt
+from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
+
+
+def sock_wait_drain(sock, max_wait=1000):
+ """Wait for all pending write data on the socket to get ACKed."""
+ for _ in range(max_wait):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one)
+ outq = struct.unpack("I", outq)[0]
+ if outq == 0:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, 0)
+
+
+def tcp_sock_get_retrans(sock):
+ """Get the number of retransmissions for the TCP socket."""
+ info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512)
+ return struct.unpack("I", info[100:104])[0]
+
+
+def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
+ cfg.require_cmd("socat", remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
+
+ with bkg(listen_cmd, host=cfg.remote) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ if ipver == "4":
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((remote_v4, port))
+ else:
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ sock.connect((remote_v6, port))
+
+ # Small send to make sure the connection is working.
+ sock.send("ping".encode())
+ sock_wait_drain(sock)
+
+ # Send 4MB of data, record the LSO packet count.
+ qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ buf = b"0" * 1024 * 1024 * 4
+ sock.send(buf)
+ sock_wait_drain(sock)
+ qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ # No math behind the 10 here, but try to catch cases where
+ # TCP falls back to non-LSO.
+ ksft_lt(tcp_sock_get_retrans(sock), 10)
+ sock.close()
+
+ # Check that at least 90% of the data was sent as LSO packets.
+ # System noise may cause false negatives. Also header overheads
+ # will add up to 5% of extra packes... The check is best effort.
+ total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
+ total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+ if should_lso:
+ if cfg.have_stat_super_count:
+ ksft_ge(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ total_lso_super,
+ comment="Number of LSO super-packets with LSO enabled")
+ if cfg.have_stat_wire_count:
+ ksft_ge(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ total_lso_wire,
+ comment="Number of LSO wire-packets with LSO enabled")
+ else:
+ if cfg.have_stat_super_count:
+ ksft_lt(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ 15, comment="Number of LSO super-packets with LSO disabled")
+ if cfg.have_stat_wire_count:
+ ksft_lt(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ 500, comment="Number of LSO wire-packets with LSO disabled")
+
+
+def build_tunnel(cfg, outer_ipver, tun_info):
+ local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1"
+ local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1"
+ remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2"
+ remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2"
+
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ tun_type = tun_info[0]
+ tun_arg = tun_info[2]
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {tun_type}-ksft")
+ ip(f"link set dev {tun_type}-ksft up")
+ ip(f"addr add {local_v4}/24 dev {tun_type}-ksft")
+ ip(f"addr add {local_v6}/64 dev {tun_type}-ksft")
+
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {tun_type}-ksft", host=cfg.remote)
+ ip(f"link set dev {tun_type}-ksft up", host=cfg.remote)
+ ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote)
+ ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote)
+
+ return remote_v4, remote_v6
+
+
+def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
+ """Construct specific tests from the common template."""
+ def f(cfg):
+ cfg.require_ipver(outer_ipver)
+
+ if not cfg.have_stat_super_count and \
+ not cfg.have_stat_wire_count:
+ raise KsftSkipEx(f"Device does not support LSO queue stats")
+
+ ipver = outer_ipver
+ if tun:
+ remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
+ ipver = inner_ipver
+ else:
+ remote_v4 = cfg.remote_addr_v["4"]
+ remote_v6 = cfg.remote_addr_v["6"]
+
+ tun_partial = tun and tun[1]
+ # Tunnel which can silently fall back to gso-partial
+ has_gso_partial = tun and 'tx-gso-partial' in cfg.features
+
+ # For TSO4 via partial we need mangleid
+ if ipver == "4" and feature in cfg.partial_features:
+ ksft_pr("Testing with mangleid enabled")
+ if 'tx-tcp-mangleid-segmentation' not in cfg.features:
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
+ defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+
+ # First test without the feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} off")
+ if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
+
+ # Now test with the feature enabled.
+ # For compatible tunnels only - just GSO partial, not specific feature.
+ if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6,
+ should_lso=tun_partial)
+
+ # Full feature enabled.
+ if feature in cfg.features:
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
+ else:
+ raise KsftXfailEx(f"Device does not support {feature}")
+
+ f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
+ return f
+
+
+def query_nic_features(cfg) -> None:
+ """Query and cache the NIC features."""
+ cfg.have_stat_super_count = False
+ cfg.have_stat_wire_count = False
+
+ cfg.features = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ cfg.features.add(f["name"])
+
+ # Check which features are supported via GSO partial
+ cfg.partial_features = set()
+ if 'tx-gso-partial' in cfg.features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+
+ no_partial = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ no_partial.add(f["name"])
+ cfg.partial_features = cfg.features - no_partial
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+
+ stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
+ if stats:
+ if 'tx-hw-gso-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO super-packets")
+ cfg.have_stat_super_count = True
+ if 'tx-hw-gso-wire-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO wire-packets")
+ cfg.have_stat_wire_count = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ query_nic_features(cfg)
+
+ test_info = (
+ # name, v4/v6 ethtool_feature tun:(type, partial, args)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
+ ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
+ )
+
+ cases = []
+ for outer_ipver in ["4", "6"]:
+ for info in test_info:
+ # Skip if test which only works for a specific IP version
+ if info[1] and outer_ipver != info[1]:
+ continue
+
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
+ tun=info[3], inner_ipver="4"))
+ if info[3]:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
+ tun=info[3], inner_ipver="6"))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 987e452d3a45..96b33b5ef9dd 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -10,41 +10,61 @@ from lib.py import NetNS, NetdevSimDev
from .remote import Remote
-def _load_env_file(src_path):
- env = os.environ.copy()
+class NetDrvEnvBase:
+ """
+ Base class for a NIC / host envirnoments
+ """
+ def __init__(self, src_path):
+ self.src_path = src_path
+ self.env = self._load_env_file()
+
+ def rpath(self, path):
+ """
+ Get an absolute path to a file based on a path relative to the directory
+ containing the test which constructed env.
- src_dir = Path(src_path).parent.resolve()
- if not (src_dir / "net.config").exists():
+ For example, if the test.py is in the same directory as
+ a binary (built from helper.c), the test can use env.rpath("helper")
+ to get the absolute path to the binary
+ """
+ src_dir = Path(self.src_path).parent.resolve()
+ return (src_dir / path).as_posix()
+
+ def _load_env_file(self):
+ env = os.environ.copy()
+
+ src_dir = Path(self.src_path).parent.resolve()
+ if not (src_dir / "net.config").exists():
+ return ksft_setup(env)
+
+ with open((src_dir / "net.config").as_posix(), 'r') as fp:
+ for line in fp.readlines():
+ full_file = line
+ # Strip comments
+ pos = line.find("#")
+ if pos >= 0:
+ line = line[:pos]
+ line = line.strip()
+ if not line:
+ continue
+ pair = line.split('=', maxsplit=1)
+ if len(pair) != 2:
+ raise Exception("Can't parse configuration line:", full_file)
+ env[pair[0]] = pair[1]
return ksft_setup(env)
- with open((src_dir / "net.config").as_posix(), 'r') as fp:
- for line in fp.readlines():
- full_file = line
- # Strip comments
- pos = line.find("#")
- if pos >= 0:
- line = line[:pos]
- line = line.strip()
- if not line:
- continue
- pair = line.split('=', maxsplit=1)
- if len(pair) != 2:
- raise Exception("Can't parse configuration line:", full_file)
- env[pair[0]] = pair[1]
- return ksft_setup(env)
-
-
-class NetDrvEnv:
+
+class NetDrvEnv(NetDrvEnvBase):
"""
Class for a single NIC / host env, with no remote end
"""
def __init__(self, src_path, **kwargs):
- self._ns = None
+ super().__init__(src_path)
- self.env = _load_env_file(src_path)
+ self._ns = None
if 'NETIF' in self.env:
- self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
else:
self._ns = NetdevSimDev(**kwargs)
self.dev = self._ns.nsims[0].dev
@@ -68,7 +88,7 @@ class NetDrvEnv:
self._ns = None
-class NetDrvEpEnv:
+class NetDrvEpEnv(NetDrvEnvBase):
"""
Class for an environment with a local device and "remote endpoint"
which can be used to send traffic in.
@@ -82,8 +102,7 @@ class NetDrvEpEnv:
nsim_v6_pfx = "2001:db8::"
def __init__(self, src_path, nsim_test=None):
-
- self.env = _load_env_file(src_path)
+ super().__init__(src_path)
self._stats_settle_time = None
@@ -94,17 +113,20 @@ class NetDrvEpEnv:
self._ns = None
self._ns_peer = None
+ self.addr_v = { "4": None, "6": None }
+ self.remote_addr_v = { "4": None, "6": None }
+
if "NETIF" in self.env:
if nsim_test is True:
raise KsftXfailEx("Test only works on netdevsim")
self._check_env()
- self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
- self.v4 = self.env.get("LOCAL_V4")
- self.v6 = self.env.get("LOCAL_V6")
- self.remote_v4 = self.env.get("REMOTE_V4")
- self.remote_v6 = self.env.get("REMOTE_V6")
+ self.addr_v["4"] = self.env.get("LOCAL_V4")
+ self.addr_v["6"] = self.env.get("LOCAL_V6")
+ self.remote_addr_v["4"] = self.env.get("REMOTE_V4")
+ self.remote_addr_v["6"] = self.env.get("REMOTE_V6")
kind = self.env["REMOTE_TYPE"]
args = self.env["REMOTE_ARGS"]
else:
@@ -115,26 +137,29 @@ class NetDrvEpEnv:
self.dev = self._ns.nsims[0].dev
- self.v4 = self.nsim_v4_pfx + "1"
- self.v6 = self.nsim_v6_pfx + "1"
- self.remote_v4 = self.nsim_v4_pfx + "2"
- self.remote_v6 = self.nsim_v6_pfx + "2"
+ self.addr_v["4"] = self.nsim_v4_pfx + "1"
+ self.addr_v["6"] = self.nsim_v6_pfx + "1"
+ self.remote_addr_v["4"] = self.nsim_v4_pfx + "2"
+ self.remote_addr_v["6"] = self.nsim_v6_pfx + "2"
kind = "netns"
args = self._netns.name
self.remote = Remote(kind, args, src_path)
- self.addr = self.v6 if self.v6 else self.v4
- self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4
+ self.addr_ipver = "6" if self.addr_v["6"] else "4"
+ self.addr = self.addr_v[self.addr_ipver]
+ self.remote_addr = self.remote_addr_v[self.addr_ipver]
- self.addr_ipver = "6" if self.v6 else "4"
# Bracketed addresses, some commands need IPv6 to be inside []
- self.baddr = f"[{self.v6}]" if self.v6 else self.v4
- self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4
+ self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"]
+ self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"]
self.ifname = self.dev['ifname']
self.ifindex = self.dev['ifindex']
+ # resolve remote interface name
+ self.remote_ifname = self.resolve_remote_ifc()
+
self._required_cmd = {}
def create_local(self):
@@ -181,6 +206,18 @@ class NetDrvEpEnv:
raise Exception("Invalid environment, missing configuration:", missing,
"Please see tools/testing/selftests/drivers/net/README.rst")
+ def resolve_remote_ifc(self):
+ v4 = v6 = None
+ if self.remote_addr_v["4"]:
+ v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote)
+ if self.remote_addr_v["6"]:
+ v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote)
+ if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]:
+ raise Exception("Can't resolve remote interface name, v4 and v6 don't match")
+ if (v4 and len(v4) > 1) or (v6 and len(v6) > 1):
+ raise Exception("Can't resolve remote interface name, multiple interfaces match")
+ return v6[0]["ifname"] if v6 else v4[0]["ifname"]
+
def __enter__(self):
return self
@@ -204,13 +241,9 @@ class NetDrvEpEnv:
del self.remote
self.remote = None
- def require_v4(self):
- if not self.v4 or not self.remote_v4:
- raise KsftSkipEx("Test requires IPv4 connectivity")
-
- def require_v6(self):
- if not self.v6 or not self.remote_v6:
- raise KsftSkipEx("Test requires IPv6 connectivity")
+ def require_ipver(self, ipver):
+ if not self.addr_v[ipver] or not self.remote_addr_v[ipver]:
+ raise KsftSkipEx(f"Test requires IPv{ipver} connectivity")
def _require_cmd(self, comm, key, host=None):
cached = self._required_cmd.get(comm, {})
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 3acaba41ac7b..3c96b022954d 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -110,6 +110,13 @@ function create_dynamic_target() {
echo 1 > "${NETCONS_PATH}"/enabled
}
+# Do not append the release to the header of the message
+function disable_release_append() {
+ echo 0 > "${NETCONS_PATH}"/enabled
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
function cleanup() {
local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
@@ -223,3 +230,20 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
}
+
+function check_for_taskset() {
+ if ! which taskset > /dev/null ; then
+ echo "SKIP: taskset(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# This is necessary if running multiple tests in a row
+function pkill_socat() {
+ PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ # socat runs under timeout(1), kill it if it is still alive
+ # do not fail if socat doesn't exist anymore
+ set +e
+ pkill -f "${PROCESS_NAME}"
+ set -e
+}
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
new file mode 100755
index 000000000000..4a71e01a230c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test netconsole's message fragmentation functionality.
+#
+# When a message exceeds the maximum packet size, netconsole splits it into
+# multiple fragments for transmission. This test verifies:
+# - Correct fragmentation of large messages
+# - Proper reassembly of fragments at the receiver
+# - Preservation of userdata across fragments
+# - Behavior with and without kernel release version appending
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# set userdata to a long value. In this case, it is "1-2-3-4...50-"
+USERDATA_VALUE=$(printf -- '%.2s-' {1..60})
+
+# Convert the header string in a regexp, so, we can remove
+# the second header as well.
+# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If
+# release is appended, you might find something like:L
+# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;"
+function header_to_regex() {
+ # header is everything before ;
+ local HEADER="${1}"
+ REGEX=$(echo "${HEADER}" | cut -d'=' -f1)
+ echo "${REGEX}=[0-9]*\/[0-9]*;"
+}
+
+# We have two headers in the message. Remove both to get the full message,
+# and extract the full message.
+function extract_msg() {
+ local MSGFILE="${1}"
+ # Extract the header, which is the very first thing that arrives in the
+ # first list.
+ HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1)
+ HEADER_REGEX=$(header_to_regex "${HEADER}")
+
+ # Remove the two headers from the received message
+ # This will return the message without any header, similarly to what
+ # was sent.
+ sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}"
+}
+
+# Validate the message, which has two messages glued together.
+# unwrap them to make sure all the characters were transmitted.
+# File will look like the following:
+# 13,468,514729715,-,ncfrag=0/1135;<message>
+# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key>
+function validate_fragmented_result() {
+ # Discard the netconsole headers, and assemble the full message
+ RCVMSG=$(extract_msg "${1}")
+
+ # check for the main message
+ if ! echo "${RCVMSG}" | grep -q "${MSG}"; then
+ echo "Message body doesn't match." >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # check userdata
+ if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then
+ echo "message userdata doesn't match" >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+ # test passed. hooray
+}
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Set userdata "key" with the "value" value
+set_user_data
+
+
+# TEST 1: Send message and userdata. They will fragment
+# =======
+MSG=$(printf -- 'MSG%.3s=' {1..150})
+
+# Listen for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Check if the message was not corrupted
+validate_fragmented_result "${OUTPUT_FILE}"
+
+# TEST 2: Test with smaller message, and without release appended
+# =======
+MSG=$(printf -- 'FOOBAR%.3s=' {1..100})
+# Let's disable release and test again.
+disable_release_append
+
+listen_port_and_save_to "${OUTPUT_FILE}" &
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+validate_fragmented_result "${OUTPUT_FILE}"
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
new file mode 100755
index 000000000000..2b78fd1f5982
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -0,0 +1,167 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A test that makes sure that sysdata runtime CPU data is properly set
+# when a message is sent.
+#
+# There are 3 different tests, every time sent using a random CPU.
+# - Test #1
+# * Only enable cpu_nr sysdata feature.
+# - Test #2
+# * Keep cpu_nr sysdata feature enable and enable userdata.
+# - Test #3
+# * keep userdata enabled, and disable sysdata cpu_nr feature.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Enable the sysdata cpu_nr feature
+function set_cpu_nr() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]]
+ then
+ echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Disable the sysdata cpu_nr feature
+function unset_cpu_nr() {
+ echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Test if MSG content and `cpu=${CPU}` exists in OUTPUT_FILE
+function validate_sysdata_cpu_exists() {
+ # OUTPUT_FILE will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # userdatakey=userdatavalue
+ # cpu=X
+
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Check if cpu=XX exists in the file and matches the one used
+ # in taskset(1)
+ if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+ pkill_socat
+}
+
+# Test if MSG content exists in OUTPUT_FILE but no `cpu=` string
+function validate_sysdata_no_cpu() {
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "cpu=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+}
+
+# Start socat, send the message and wait for the file to show up in the file
+# system
+function runtest {
+ # Listen for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# This test also depends on taskset(1). Check for it before starting the test
+check_for_taskset
+
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+
+#====================================================
+# TEST #1
+# Send message from a random CPU
+#====================================================
+# Random CPU in the system
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_1"
+MSG="Test #1 from CPU${CPU}"
+# Enable the auto population of cpu_nr
+set_cpu_nr
+runtest
+# Make sure the message was received in the dst part
+# and exit
+validate_sysdata_cpu_exists
+
+#====================================================
+# TEST #2
+# This test now adds userdata together with sysdata
+# ===================================================
+# Get a new random CPU
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_2"
+MSG="Test #2 from CPU${CPU}"
+set_user_data
+runtest
+validate_sysdata_cpu_exists
+
+# ===================================================
+# TEST #3
+# Unset cpu_nr, so, no CPU should be appended.
+# userdata is still set
+# ===================================================
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_3"
+MSG="Test #3 from CPU${CPU}"
+# Enable the auto population of cpu_nr
+unset_cpu_nr
+runtest
+# At this time, cpu= shouldn't be present in the msg
+validate_sysdata_no_cpu
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index eb83e7b48797..17dc11e9b6dd 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -8,17 +8,17 @@ from lib.py import bkg, cmd, wait_port_listen, rand_port
def test_v4(cfg) -> None:
- cfg.require_v4()
+ cfg.require_ipver("4")
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
- cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+ cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+ cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
def test_v6(cfg) -> None:
- cfg.require_v6()
+ cfg.require_ipver("6")
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
- cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
+ cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["6"])
+ cmd("ping -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
def test_tcp(cfg) -> None:
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 38303da957ee..5fdfebc6415f 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -2,13 +2,16 @@
# SPDX-License-Identifier: GPL-2.0
from lib.py import ksft_disruptive, ksft_exit, ksft_run
-from lib.py import ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import NetDrvEnv
from lib.py import cmd, defer, ip
import errno
import glob
-
+import os
+import socket
+import struct
+import subprocess
def sys_get_queues(ifname, qtype='rx') -> int:
folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
@@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'):
return len([q for q in queues if q['type'] == qtype])
return None
+def check_xdp(cfg, nl, xdp_queue_id=0) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1,
+ text=True)
+ defer(xdp.kill)
+
+ stdout, stderr = xdp.communicate(timeout=10)
+ rx = tx = False
+
+ if xdp.returncode == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.returncode > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+ if not queues:
+ raise KsftSkipEx("Netlink reports no queues")
+
+ for q in queues:
+ if q['id'] == 0:
+ if q['type'] == 'rx':
+ rx = True
+ if q['type'] == 'tx':
+ tx = True
+
+ ksft_eq(q['xsk'], {})
+ else:
+ if 'xsk' in q:
+ _fail("Check failed: xsk attribute set.")
+
+ ksft_eq(rx, True)
+ ksft_eq(tx, True)
def get_queues(cfg, nl) -> None:
snl = NetdevFamily(recv_size=4096)
@@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None:
def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
- ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily()))
+ ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c
new file mode 100644
index 000000000000..cf06a88b830b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp_helper.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <linux/if_xdp.h>
+#include <linux/if_link.h>
+#include <net/if.h>
+#include <inttypes.h>
+
+#define UMEM_SZ (1U << 16)
+#define NUM_DESC (UMEM_SZ / 2048)
+
+/* this is a simple helper program that creates an XDP socket and does the
+ * minimum necessary to get bind() to succeed.
+ *
+ * this test program is not intended to actually process packets, but could be
+ * extended in the future if that is actually needed.
+ *
+ * it is used by queues.py to ensure the xsk netlinux attribute is set
+ * correctly.
+ */
+int main(int argc, char **argv)
+{
+ struct xdp_umem_reg umem_reg = { 0 };
+ struct sockaddr_xdp sxdp = { 0 };
+ int num_desc = NUM_DESC;
+ void *umem_area;
+ int ifindex;
+ int sock_fd;
+ int queue;
+ char byte;
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]);
+ return 1;
+ }
+
+ sock_fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (sock_fd < 0) {
+ perror("socket creation failed");
+ /* if the kernel doesn't support AF_XDP, let the test program
+ * know with -1. All other error paths return 1.
+ */
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ ifindex = atoi(argv[1]);
+ queue = atoi(argv[2]);
+
+ umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (umem_area == MAP_FAILED) {
+ perror("mmap failed");
+ return 1;
+ }
+
+ umem_reg.addr = (uintptr_t)umem_area;
+ umem_reg.len = UMEM_SZ;
+ umem_reg.chunk_size = 2048;
+ umem_reg.headroom = 0;
+
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
+ sizeof(umem_reg));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));
+
+ sxdp.sxdp_family = AF_XDP;
+ sxdp.sxdp_ifindex = ifindex;
+ sxdp.sxdp_queue_id = queue;
+ sxdp.sxdp_flags = 0;
+
+ if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
+ munmap(umem_area, UMEM_SZ);
+ perror("bind failed");
+ close(sock_fd);
+ return 1;
+ }
+
+ /* give the parent program some data when the socket is ready*/
+ fprintf(stdout, "%d\n", sock_fd);
+
+ /* parent program will write a byte to stdin when its ready for this
+ * helper to exit
+ */
+ read(STDIN_FILENO, &byte, 1);
+
+ close(sock_fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 28a715a8ef2b..80dcae53ef55 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -42,6 +42,7 @@ socket
so_incoming_cpu
so_netns_cookie
so_txtime
+so_rcv_listener
stress_reuseport_listen
tap
tcp_fastopen_backup_key
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 73ee88d6b043..8d6116b80cf1 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -7,7 +7,7 @@ CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
CFLAGS += -I../
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
- rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
+ rtnetlink.sh xfrm_policy.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
@@ -33,9 +33,11 @@ TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_so_priority.sh
+TEST_PROGS += test_so_rcv.sh
TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += netns-name.sh
TEST_PROGS += nl_netdev.py
+TEST_PROGS += rtnetlink.py
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -75,6 +77,7 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
+TEST_GEN_FILES += so_rcv_listener
TEST_PROGS += test_vxlan_vnifiltering.sh
TEST_GEN_FILES += io_uring_zerocopy_tx
TEST_PROGS += io_uring_zerocopy_tx.sh
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 899dbad0104b..4fcc38907e48 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -3667,7 +3667,7 @@ ipv6_addr_bind_novrf()
# when it really should not
a=${NSA_LO_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
}
@@ -3724,7 +3724,7 @@ ipv6_addr_bind_vrf()
# passes when it really should not
a=${VRF_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
index d5e3abb8658c..9931a1e36e3d 100755
--- a/tools/testing/selftests/net/fdb_flush.sh
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes()
$IP link del dev vx10
$IP link add name vx10 type vxlan dstport "$VXPORT" external
- # For multicat FDB entries, the VXLAN driver stores a linked list of
+ # For multicast FDB entries, the VXLAN driver stores a linked list of
# remotes for a given key. Verify that only the expected remotes are
# flushed.
multicast_fdb_entries_add
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 77c83d9508d3..bea1282e0281 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -741,7 +741,7 @@ ipv6_fcnal()
run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
log_test $? 2 "Create nexthop - gw only"
- # gw is not reachable throught given dev
+ # gw is not reachable through given dev
run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
log_test $? 2 "Create nexthop - invalid gw+dev combination"
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 847936363a12..06c51d7ceb4a 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -256,6 +256,24 @@ fib_rule6_test()
fib_rule6_test_match_n_redirect "$match" "$match" \
"$getnomatch" "sport and dport redirect to table" \
"sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
@@ -525,6 +543,24 @@ fib_rule4_test()
fib_rule4_test_match_n_redirect "$match" "$match" \
"$getnomatch" "sport and dport redirect to table" \
"sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d9d587454d20..8c1597ebc2d3 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -149,7 +149,7 @@ cfg_test_host_common()
check_err $? "Failed to add $name host entry"
bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
- check_fail $? "Managed to replace $name host entry"
+ check_err $? "Failed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 8de80acf249e..508f3c700d71 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -291,16 +291,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
-require_command()
-{
- local cmd=$1; shift
-
- if [[ ! -x "$(command -v "$cmd")" ]]; then
- echo "SKIP: $cmd not installed"
- exit $ksft_skip
- fi
-}
-
# IPv6 support was added in v3.0
check_mtools_version()
{
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 3f9d50f1ef9e..180c5eca556f 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -740,6 +740,8 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
+ # The entry should age out when it only forwards traffic
+ $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q &
sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 0bd9a038a1f0..975be4fdbcdb 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -450,6 +450,25 @@ kill_process()
{ kill $pid && wait $pid; } 2>/dev/null
}
+check_command()
+{
+ local cmd=$1; shift
+
+ if [[ ! -x "$(command -v "$cmd")" ]]; then
+ log_test_skip "$cmd not installed"
+ return $EXIT_STATUS
+ fi
+}
+
+require_command()
+{
+ local cmd=$1; shift
+
+ if ! check_command "$cmd"; then
+ exit $EXIT_STATUS
+ fi
+}
+
ip_link_add()
{
local name=$1; shift
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 54d8f5eba810..729457859316 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -5,5 +5,5 @@ from .ksft import *
from .netns import NetNS
from .nsim import *
from .utils import *
-from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
+from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
from .ynl import NetshaperFamily
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index ad1e36baee2a..8986c584cb37 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -42,6 +42,10 @@ class RtnlFamily(YnlFamily):
super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
schema='', recv_size=recv_size)
+class RtnlAddrFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
def __init__(self, recv_size=0):
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 9c2a415976cb..2329c2f8519b 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -28,7 +28,7 @@ size=0
usage() {
echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
- echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-b: bail out after first error, otherwise runs all testcases"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
echo -e "\t-d: debug this script"
echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93e8cb671c3d..beaee5e4e2aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -35,6 +35,21 @@ def napi_list_check(nf) -> None:
comment=f"queue count after reset queue {q} mode {i}")
+def nsim_rxq_reset_down(nf) -> None:
+ """
+ Test that the queue API supports resetting a queue
+ while the interface is down. We should convert this
+ test to testing real HW once more devices support
+ queue API.
+ """
+ with NetdevSimDev(queue_count=4) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} down")
+ for i in [0, 2, 3]:
+ nsim.dfs_write("queue_reset", f"1 {i}")
+
+
def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
@@ -106,7 +121,8 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
- ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
+ ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
+ nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 404a2ce759ab..221270cee3ea 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -12,7 +12,7 @@
*
* Datapath:
* Open a pair of packet sockets and send resp. receive an a priori known
- * packet pattern accross the sockets and check if it was received resp.
+ * packet pattern across the sockets and check if it was received resp.
* sent correctly. Fanout in combination with RX_RING is currently not
* tested here.
*
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 066efd30e294..7b9bf8a7bbe1 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid)
ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
if (opts->reuseport[0] && opts->reuseport[1]) {
- EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened.");
} else {
EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
}
diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py
new file mode 100755
index 000000000000..80950888800b
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily
+import socket
+
+IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01'
+
+def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None:
+ """
+ Verify that at least one interface has the IPv4 all-hosts multicast address.
+ At least the loopback interface should have this address.
+ """
+
+ addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True)
+
+ all_host_multicasts = [
+ addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST
+ ]
+
+ ksft_ge(len(all_host_multicasts), 1,
+ "No interface found with the IPv4 all-hosts multicast address")
+
+def main() -> None:
+ rtnl = RtnlAddrFamily()
+ ksft_run([dump_mcaddr_check], args=(rtnl, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c
new file mode 100644
index 000000000000..bc5841192aa6
--- /dev/null
+++ b/tools/testing/selftests/net/so_rcv_listener.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#ifndef SO_RCVPRIORITY
+#define SO_RCVPRIORITY 82
+#endif
+
+struct options {
+ __u32 val;
+ int name;
+ int rcvname;
+ const char *host;
+ const char *service;
+} opt;
+
+static void __attribute__((noreturn)) usage(const char *bin)
+{
+ printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+ printf("Options:\n"
+ "\t\t-M val Test SO_RCVMARK\n"
+ "\t\t-P val Test SO_RCVPRIORITY\n"
+ "");
+ exit(EXIT_FAILURE);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+ int o;
+
+ while ((o = getopt(argc, argv, "M:P:")) != -1) {
+ switch (o) {
+ case 'M':
+ opt.val = atoi(optarg);
+ opt.name = SO_MARK;
+ opt.rcvname = SO_RCVMARK;
+ break;
+ case 'P':
+ opt.val = atoi(optarg);
+ opt.name = SO_PRIORITY;
+ opt.rcvname = SO_RCVPRIORITY;
+ break;
+ default:
+ usage(argv[0]);
+ break;
+ }
+ }
+
+ if (optind != argc - 2)
+ usage(argv[0]);
+
+ opt.host = argv[optind];
+ opt.service = argv[optind + 1];
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+ int recv_fd = -1;
+ int ret_value = 0;
+ __u32 recv_val;
+ struct cmsghdr *cmsg;
+ char cbuf[CMSG_SPACE(sizeof(__u32))];
+ char recv_buf[CMSG_SPACE(sizeof(__u32))];
+ struct iovec iov[1];
+ struct msghdr msg;
+ struct sockaddr_in recv_addr4;
+ struct sockaddr_in6 recv_addr6;
+
+ parse_args(argc, argv);
+
+ int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET;
+
+ recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP);
+ if (recv_fd < 0) {
+ perror("Can't open recv socket");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val));
+ if (err < 0) {
+ perror("Recv setsockopt error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ if (family == AF_INET) {
+ memset(&recv_addr4, 0, sizeof(recv_addr4));
+ recv_addr4.sin_family = family;
+ recv_addr4.sin_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) {
+ perror("Invalid IPV4 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4));
+ } else {
+ memset(&recv_addr6, 0, sizeof(recv_addr6));
+ recv_addr6.sin6_family = family;
+ recv_addr6.sin6_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) {
+ perror("Invalid IPV6 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6));
+ }
+
+ if (err < 0) {
+ perror("Recv bind error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ iov[0].iov_base = recv_buf;
+ iov[0].iov_len = sizeof(recv_buf);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ err = recvmsg(recv_fd, &msg, 0);
+ if (err < 0) {
+ perror("Message receive error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) {
+ recv_val = *(__u32 *)CMSG_DATA(cmsg);
+ printf("Received value: %u\n", recv_val);
+
+ if (recv_val != opt.val) {
+ fprintf(stderr, "Error: expected value: %u, got: %u\n",
+ opt.val, recv_val);
+ ret_value = -EINVAL;
+ }
+ goto cleanup;
+ }
+ }
+
+ fprintf(stderr, "Error: No matching cmsg received\n");
+ ret_value = -ENOMSG;
+
+cleanup:
+ if (recv_fd >= 0)
+ close(recv_fd);
+
+ return ret_value;
+}
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
deleted file mode 100755
index 3119b80e711f..000000000000
--- a/tools/testing/selftests/net/test_blackhole_dev.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Runs blackhole-dev test using blackhole-dev kernel module
-
-if /sbin/modprobe -q test_blackhole_dev ; then
- /sbin/modprobe -q -r test_blackhole_dev;
- echo "test_blackhole_dev: ok";
-else
- echo "test_blackhole_dev: [FAIL]";
- exit 1;
-fi
diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh
new file mode 100755
index 000000000000..d8aa4362879d
--- /dev/null
+++ b/tools/testing/selftests/net/test_so_rcv.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+HOSTS=("127.0.0.1" "::1")
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+declare -A TESTS=(
+ ["SO_RCVPRIORITY"]="-P 2"
+ ["SO_RCVMARK"]="-M 3"
+)
+
+check_result() {
+ ((TOTAL_TESTS++))
+ if [ "$1" -ne 0 ]; then
+ ((FAILED_TESTS++))
+ fi
+}
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+for HOST in "${HOSTS[@]}"; do
+ PROTOCOL="IPv4"
+ if [[ "$HOST" == "::1" ]]; then
+ PROTOCOL="IPv6"
+ fi
+ for test_name in "${!TESTS[@]}"; do
+ echo "Running $test_name test, $PROTOCOL"
+ arg=${TESTS[$test_name]}
+
+ ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT &
+ LISTENER_PID=$!
+
+ sleep 0.5
+
+ if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then
+ echo "Sender failed for $test_name, $PROTOCOL"
+ kill "$LISTENER_PID" 2>/dev/null
+ wait "$LISTENER_PID"
+ check_result 1
+ continue
+ fi
+
+ wait "$LISTENER_PID"
+ LISTENER_EXIT_CODE=$?
+
+ if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then
+ echo "Rcv test OK for $test_name, $PROTOCOL"
+ check_result 0
+ else
+ echo "Rcv test FAILED for $test_name, $PROTOCOL"
+ check_result 1
+ fi
+ done
+done
+
+if [ "$FAILED_TESTS" -ne 0 ]; then
+ echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+ exit ${KSFT_FAIL}
+else
+ echo "OK - All $TOTAL_TESTS tests passed"
+ exit ${KSFT_PASS}
+fi
diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
index 2d442cdab11e..062f957950af 100755
--- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
+++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
@@ -1,29 +1,114 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Check FDB default-remote handling across "ip link set".
+ALL_TESTS="
+ test_set_remote
+ test_change_mc_remote
+"
+source lib.sh
check_remotes()
{
local what=$1; shift
local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)
- echo -ne "expected two remotes after $what\t"
- if [[ $N != 2 ]]; then
- echo "[FAIL]"
- EXIT_STATUS=1
+ ((N == 2))
+ check_err $? "expected 2 remotes after $what, got $N"
+}
+
+# Check FDB default-remote handling across "ip link set".
+test_set_remote()
+{
+ RET=0
+
+ ip_link_add vx up type vxlan id 2000 dstport 4789
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
+ check_remotes "fdb append"
+
+ ip link set dev vx type vxlan remote 192.0.2.30
+ check_remotes "link set"
+
+ log_test 'FDB default-remote handling across "ip link set"'
+}
+
+fmt_remote()
+{
+ local addr=$1; shift
+
+ if [[ $addr == 224.* ]]; then
+ echo "group $addr"
else
- echo "[ OK ]"
+ echo "remote $addr"
fi
}
-ip link add name vx up type vxlan id 2000 dstport 4789
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
-check_remotes "fdb append"
+change_remote()
+{
+ local remote=$1; shift
+
+ ip link set dev vx type vxlan $(fmt_remote $remote) dev v1
+}
+
+check_membership()
+{
+ local check_vec=("$@")
+
+ local memberships
+ memberships=$(
+ netstat -n --groups |
+ sed -n '/^v1\b/p' |
+ grep -o '[^ ]*$'
+ )
+ check_err $? "Couldn't obtain group memberships"
+
+ local item
+ for item in "${check_vec[@]}"; do
+ eval "local $item"
+ echo "$memberships" | grep -q "\b$group\b"
+ check_err_fail $fail $? "$group is_ex reported in IGMP query response"
+ done
+}
+
+test_change_mc_remote()
+{
+ check_command netstat || return
+
+ ip_link_add v1 up type veth peer name v2
+ ip_link_set_up v2
+
+ RET=0
+
+ ip_link_add vx up type vxlan dstport 4789 \
+ local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000
+
+ check_membership "group=224.1.1.1 fail=0" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after VXLAN creation"
+
+ RET=0
+
+ change_remote 224.1.1.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=0" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->MC"
+
+ RET=0
+
+ change_remote 192.0.2.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->UC"
+}
+
+trap defer_scopes_cleanup EXIT
-ip link set dev vx type vxlan remote 192.0.2.30
-check_remotes "link set"
+tests_run
-ip link del dev vx
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index 12e7cae251be..e907c2751956 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig:
$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
$(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a
- $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+ $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \
+ GENS="$(YNL_GENS)" RSTS="" libynl.a
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
EXTRA_CLEAN += \