diff options
46 files changed, 1554 insertions, 264 deletions
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 9c0ce92e9212..56f2235f5fb5 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -51,11 +51,15 @@ properties: - allwinner,sun8i-r40-emac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - amlogic,meson6-dwmac - amlogic,meson8b-dwmac - amlogic,meson8m2-dwmac - amlogic,meson-gxbb-dwmac - amlogic,meson-axg-dwmac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - ingenic,jz4775-mac - ingenic,x1000-mac - ingenic,x1600-mac @@ -363,6 +367,8 @@ allOf: - allwinner,sun8i-r40-emac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - ingenic,jz4775-mac - ingenic,x1000-mac - ingenic,x1600-mac diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst index 7d01ae8b3cc6..0d49bad2ea9c 100644 --- a/Documentation/firmware-guide/acpi/dsd/phy.rst +++ b/Documentation/firmware-guide/acpi/dsd/phy.rst @@ -27,7 +27,8 @@ network interfaces that have PHYs connected to MAC via MDIO bus. During the MDIO bus driver initialization, PHYs on this bus are probed using the _ADR object as shown below and are registered on the MDIO bus. -:: +.. code-block:: none + Scope(\_SB.MDI0) { Device(PHY1) { @@ -60,7 +61,9 @@ component (PHYs on the MDIO bus). a) Silicon Component This node describes the MDIO controller, MDI0 --------------------------------------------- -:: + +.. code-block:: none + Scope(_SB) { Device(MDI0) { @@ -80,7 +83,9 @@ This node describes the MDIO controller, MDI0 b) Platform Component The PHY1 and PHY2 nodes represent the PHYs connected to MDIO bus MDI0 --------------------------------------------------------------------- -:: + +.. code-block:: none + Scope(\_SB.MDI0) { Device(PHY1) { @@ -98,7 +103,9 @@ DSDT entries representing MAC nodes Below are the MAC nodes where PHY nodes are referenced. phy-mode and phy-handle are used as explained earlier. ------------------------------------------------------ -:: + +.. code-block:: none + Scope(\_SB.MCE0.PR17) { Name (_DSD, Package () { diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst index f72b5f1769fb..a99ee402b212 100644 --- a/Documentation/firmware-guide/acpi/index.rst +++ b/Documentation/firmware-guide/acpi/index.rst @@ -11,6 +11,7 @@ ACPI Support dsd/graph dsd/data-node-references dsd/leds + dsd/phy enumeration osi method-customizing diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 3b352e5f6300..ee06fd782465 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -24,3 +24,11 @@ add_addr_timeout - INTEGER (seconds) sysctl. Default: 120 + +checksum_enabled - BOOLEAN + Control whether DSS checksum can be enabled. + + DSS checksum can be enabled if the value is nonzero. This is a + per-namespace sysctl. + + Default: 0 diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index 569e814def83..5747f171de29 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -114,6 +114,52 @@ ranges = <0x01000000 0x0 0x00000000 0x0 0x18000000 0x0 0x00010000>, <0x02000000 0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>; + gmac@3,0 { + compatible = "pci0014,7a03.0", + "pci0014,7a03", + "pciclass0c0320", + "pciclass0c03", + "loongson, pci-gmac"; + + reg = <0x1800 0x0 0x0 0x0 0x0>; + interrupts = <12 IRQ_TYPE_LEVEL_LOW>, + <13 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "macirq", "eth_lpi"; + interrupt-parent = <&liointc0>; + phy-mode = "rgmii"; + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; + }; + + gmac@3,1 { + compatible = "pci0014,7a03.0", + "pci0014,7a03", + "pciclass0c0320", + "pciclass0c03", + "loongson, pci-gmac"; + + reg = <0x1900 0x0 0x0 0x0 0x0>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>, + <15 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "macirq", "eth_lpi"; + interrupt-parent = <&liointc0>; + phy-mode = "rgmii"; + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + phy1: ethernet-phy@1 { + reg = <0>; + }; + }; + }; + ehci@4,1 { compatible = "pci0014,7a14.0", "pci0014,7a14", diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi index f99a7a11fded..58b9bb47c58a 100644 --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi @@ -186,7 +186,8 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass020000", - "pciclass0200"; + "pciclass0200", + "loongson, pci-gmac"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, @@ -208,7 +209,8 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass020000", - "pciclass0200"; + "pciclass0200", + "loongson, pci-gmac"; reg = <0x1900 0x0 0x0 0x0 0x0>; interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 39124726bdd9..221c7abdef0e 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -136,6 +136,7 @@ struct sja1105_info { int (*clocking_setup)(struct sja1105_private *priv); int (*pcs_mdio_read)(struct mii_bus *bus, int phy, int reg); int (*pcs_mdio_write)(struct mii_bus *bus, int phy, int reg, u16 val); + int (*disable_microcontroller)(struct sja1105_private *priv); const char *name; bool supports_mii[SJA1105_MAX_NUM_PORTS]; bool supports_rmii[SJA1105_MAX_NUM_PORTS]; @@ -363,7 +364,7 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port); int sja1110_setup_rgmii_delay(const void *ctx, int port); int sja1105_clocking_setup_port(struct sja1105_private *priv, int port); int sja1105_clocking_setup(struct sja1105_private *priv); -int sja1110_clocking_setup(struct sja1105_private *priv); +int sja1110_disable_microcontroller(struct sja1105_private *priv); /* From sja1105_ethtool.c */ void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data); diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 645edea5a81f..387a1f2f161c 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -6,6 +6,7 @@ #include "sja1105.h" #define SJA1105_SIZE_CGU_CMD 4 +#define SJA1110_BASE_MCSS_CLK SJA1110_CGU_ADDR(0x70) #define SJA1110_BASE_TIMER_CLK SJA1110_CGU_ADDR(0x74) /* Common structure for CFG_PAD_MIIx_RX and CFG_PAD_MIIx_TX */ @@ -832,17 +833,30 @@ sja1110_cgu_outclk_packing(void *buf, struct sja1110_cgu_outclk *outclk, sja1105_packing(buf, &outclk->pd, 0, 0, size, op); } -/* Power down the BASE_TIMER_CLK in order to disable the watchdog */ -int sja1110_clocking_setup(struct sja1105_private *priv) +int sja1110_disable_microcontroller(struct sja1105_private *priv) { u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0}; + struct sja1110_cgu_outclk outclk_6_c = { + .clksrc = 0x3, + .pd = true, + }; struct sja1110_cgu_outclk outclk_7_c = { .clksrc = 0x5, .pd = true, }; + int rc; + /* Power down the BASE_TIMER_CLK to disable the watchdog timer */ sja1110_cgu_outclk_packing(packed_buf, &outclk_7_c, PACK); - return sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_TIMER_CLK, + rc = sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_TIMER_CLK, + packed_buf, SJA1105_SIZE_CGU_CMD); + if (rc) + return rc; + + /* Power down the BASE_MCSS_CLOCK to gate the microcontroller off */ + sja1110_cgu_outclk_packing(packed_buf, &outclk_6_c, PACK); + + return sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_MCSS_CLK, packed_buf, SJA1105_SIZE_CGU_CMD); } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 8e5cdf93c23b..a9777eb564c6 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1886,17 +1886,23 @@ int sja1105_static_config_reload(struct sja1105_private *priv, mutex_lock(&priv->ptp_data.lock); rc = __sja1105_ptp_gettimex(ds, &now, &ptp_sts_before); - if (rc < 0) - goto out_unlock_ptp; + if (rc < 0) { + mutex_unlock(&priv->ptp_data.lock); + goto out; + } /* Reset switch and send updated static configuration */ rc = sja1105_static_config_upload(priv); - if (rc < 0) - goto out_unlock_ptp; + if (rc < 0) { + mutex_unlock(&priv->ptp_data.lock); + goto out; + } rc = __sja1105_ptp_settime(ds, 0, &ptp_sts_after); - if (rc < 0) - goto out_unlock_ptp; + if (rc < 0) { + mutex_unlock(&priv->ptp_data.lock); + goto out; + } t1 = timespec64_to_ns(&ptp_sts_before.pre_ts); t2 = timespec64_to_ns(&ptp_sts_before.post_ts); @@ -1911,7 +1917,6 @@ int sja1105_static_config_reload(struct sja1105_private *priv, __sja1105_ptp_adjtime(ds, now); -out_unlock_ptp: mutex_unlock(&priv->ptp_data.lock); dev_info(priv->ds->dev, @@ -1922,9 +1927,11 @@ out_unlock_ptp: * For these interfaces there is no dynamic configuration * needed, since PLLs have same settings at all speeds. */ - rc = priv->info->clocking_setup(priv); - if (rc < 0) - goto out; + if (priv->info->clocking_setup) { + rc = priv->info->clocking_setup(priv); + if (rc < 0) + goto out; + } for (i = 0; i < ds->num_ports; i++) { struct dw_xpcs *xpcs = priv->xpcs[i]; @@ -3032,18 +3039,34 @@ static int sja1105_setup(struct dsa_switch *ds) goto out_ptp_clock_unregister; } + if (priv->info->disable_microcontroller) { + rc = priv->info->disable_microcontroller(priv); + if (rc < 0) { + dev_err(ds->dev, + "Failed to disable microcontroller: %pe\n", + ERR_PTR(rc)); + goto out_mdiobus_unregister; + } + } + /* Create and send configuration down to device */ rc = sja1105_static_config_load(priv); if (rc < 0) { dev_err(ds->dev, "Failed to load static config: %d\n", rc); goto out_mdiobus_unregister; } + /* Configure the CGU (PHY link modes and speeds) */ - rc = priv->info->clocking_setup(priv); - if (rc < 0) { - dev_err(ds->dev, "Failed to configure MII clocking: %d\n", rc); - goto out_static_config_free; + if (priv->info->clocking_setup) { + rc = priv->info->clocking_setup(priv); + if (rc < 0) { + dev_err(ds->dev, + "Failed to configure MII clocking: %pe\n", + ERR_PTR(rc)); + goto out_static_config_free; + } } + /* On SJA1105, VLAN filtering per se is always enabled in hardware. * The only thing we can do to disable it is lie about what the 802.1Q * EtherType is. diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 4aed16d23f21..08cc5dbf2fa6 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -199,7 +199,11 @@ static int sja1110_reset_cmd(struct dsa_switch *ds) const struct sja1105_regs *regs = priv->info->regs; u32 switch_reset = BIT(20); - /* Switch core reset */ + /* Only reset the switch core. + * A full cold reset would re-enable the BASE_MCSS_CLOCK PLL which + * would turn on the microcontroller, potentially letting it execute + * code which could interfere with our configuration. + */ return sja1105_xfer_u32(priv, SPI_WRITE, regs->rgu, &switch_reset, NULL); } @@ -796,7 +800,7 @@ const struct sja1105_info sja1110a_info = { .ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing, .rxtstamp = sja1110_rxtstamp, .txtstamp = sja1110_txtstamp, - .clocking_setup = sja1110_clocking_setup, + .disable_microcontroller = sja1110_disable_microcontroller, .pcs_mdio_read = sja1110_pcs_mdio_read, .pcs_mdio_write = sja1110_pcs_mdio_write, .port_speed = { @@ -847,7 +851,7 @@ const struct sja1105_info sja1110b_info = { .ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing, .rxtstamp = sja1110_rxtstamp, .txtstamp = sja1110_txtstamp, - .clocking_setup = sja1110_clocking_setup, + .disable_microcontroller = sja1110_disable_microcontroller, .pcs_mdio_read = sja1110_pcs_mdio_read, .pcs_mdio_write = sja1110_pcs_mdio_write, .port_speed = { @@ -898,7 +902,7 @@ const struct sja1105_info sja1110c_info = { .ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing, .rxtstamp = sja1110_rxtstamp, .txtstamp = sja1110_txtstamp, - .clocking_setup = sja1110_clocking_setup, + .disable_microcontroller = sja1110_disable_microcontroller, .pcs_mdio_read = sja1110_pcs_mdio_read, .pcs_mdio_write = sja1110_pcs_mdio_write, .port_speed = { @@ -949,7 +953,7 @@ const struct sja1105_info sja1110d_info = { .ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing, .rxtstamp = sja1110_rxtstamp, .txtstamp = sja1110_txtstamp, - .clocking_setup = sja1110_clocking_setup, + .disable_microcontroller = sja1110_disable_microcontroller, .pcs_mdio_read = sja1110_pcs_mdio_read, .pcs_mdio_write = sja1110_pcs_mdio_write, .port_speed = { diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index 1491b72008f3..7a422ef4deb6 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -1052,8 +1052,7 @@ sja1105_static_config_check_valid(const struct sja1105_static_config *config, (tables[blk_idx].entry_count == tables[blk_idx].ops->max_entry_count) if (tables[BLK_IDX_SCHEDULE].entry_count) { - if (config->device_id != SJA1105T_DEVICE_ID && - config->device_id != SJA1105QS_DEVICE_ID) + if (!tables[BLK_IDX_SCHEDULE].ops->max_entry_count) return SJA1105_TTETHERNET_NOT_SUPPORTED; if (tables[BLK_IDX_SCHEDULE_ENTRY_POINTS].entry_count == 0) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 51bbf5f760c5..cdb5f14fb6bc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3537,21 +3537,33 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, int size = le16_to_cpu(desc->rx.size); u32 truesize = hns3_buf_size(ring); u32 frag_size = size - pull_len; + bool reused; /* Avoid re-using remote or pfmem page */ if (unlikely(!dev_page_is_reusable(desc_cb->priv))) goto out; - /* Stack is not using and current page_offset is non-zero, we can - * reuse from the zero offset. + reused = hns3_can_reuse_page(desc_cb); + + /* Rx page can be reused when: + * 1. Rx page is only owned by the driver when page_offset + * is zero, which means 0 @ truesize will be used by + * stack after skb_add_rx_frag() is called, and the rest + * of rx page can be reused by driver. + * Or + * 2. Rx page is only owned by the driver when page_offset + * is non-zero, which means page_offset @ truesize will + * be used by stack after skb_add_rx_frag() is called, + * and 0 @ truesize can be reused by driver. */ - if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) { - desc_cb->page_offset = 0; - desc_cb->reuse_flag = 1; - } else if (desc_cb->page_offset + truesize * 2 <= - hns3_page_size(ring)) { + if ((!desc_cb->page_offset && reused) || + ((desc_cb->page_offset + truesize + truesize) <= + hns3_page_size(ring) && desc_cb->page_offset)) { desc_cb->page_offset += truesize; desc_cb->reuse_flag = 1; + } else if (desc_cb->page_offset && reused) { + desc_cb->page_offset = 0; + desc_cb->reuse_flag = 1; } else if (frag_size <= ring->rx_copybreak) { void *frag = napi_alloc_frag(frag_size); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 87d7c6ab047f..68633145a8b8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -123,11 +123,8 @@ static bool npc_is_field_present(struct rvu *rvu, enum key_fields type, u8 intf) static bool npc_is_same(struct npc_key_field *input, struct npc_key_field *field) { - int ret; - - ret = memcmp(&input->layer_mdata, &field->layer_mdata, - sizeof(struct npc_layer_mdata)); - return ret == 0; + return memcmp(&input->layer_mdata, &field->layer_mdata, + sizeof(struct npc_layer_mdata)) == 0; } static void npc_set_layer_mdata(struct npc_mcam *mcam, enum key_fields type, diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index e967867828d8..9b48ae4bac39 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1528,6 +1528,7 @@ static int pxa168_eth_remove(struct platform_device *pdev) struct net_device *dev = platform_get_drvdata(pdev); struct pxa168_eth_private *pep = netdev_priv(dev); + cancel_work_sync(&pep->tx_timeout_task); if (pep->htpr) { dma_free_coherent(pep->dev->dev.parent, HASH_ADDR_TABLE_SIZE, pep->htpr, pep->htpr_dma); @@ -1539,7 +1540,6 @@ static int pxa168_eth_remove(struct platform_device *pdev) clk_disable_unprepare(pep->clk); mdiobus_unregister(pep->smi_bus); mdiobus_free(pep->smi_bus); - cancel_work_sync(&pep->tx_timeout_task); unregister_netdev(dev); free_netdev(dev); return 0; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 0528b8f49061..82eef4c72f01 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3678,10 +3678,9 @@ static int vxge_config_vpaths(struct vxge_hw_device_config *device_config, driver_config->vpath_per_dev = 1; for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++) - if (!vxge_bVALn(vpath_mask, i, 1)) - continue; - else + if (vxge_bVALn(vpath_mask, i, 1)) default_no_vpath++; + if (default_no_vpath < driver_config->vpath_per_dev) driver_config->vpath_per_dev = default_no_vpath; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 8a31ce29ecfc..14282472c7a6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -3341,9 +3341,6 @@ qlcnic_can_start_firmware(struct qlcnic_adapter *adapter) do { msleep(1000); prev_state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); - - if (prev_state == QLCNIC_DEV_QUISCENT) - continue; } while ((prev_state != QLCNIC_DEV_READY) && --dev_init_timeo); if (!dev_init_timeo) { diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 9a19e4d9da02..ac3c248d4f9b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -250,6 +250,15 @@ config DWMAC_INTEL This selects the Intel platform specific bus support for the stmmac driver. This driver is used for Intel Quark/EHL/TGL. +config DWMAC_LOONGSON + tristate "Loongson PCI DWMAC support" + default MACH_LOONGSON64 + depends on STMMAC_ETH && PCI + depends on COMMON_CLK + help + This selects the LOONGSON PCI bus support for the stmmac driver, + Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge. + config STMMAC_PCI tristate "STMMAC PCI bus support" depends on STMMAC_ETH && PCI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 6471f93889ee..d4e12e9ace4f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -37,4 +37,5 @@ dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o +obj-$(CONFIG_DWMAC_LOONGSON) += dwmac-loongson.o stmmac-pci-objs:= stmmac_pci.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c new file mode 100644 index 000000000000..8cd4e2e8ec40 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Loongson Corporation + */ + +#include <linux/clk-provider.h> +#include <linux/pci.h> +#include <linux/dmi.h> +#include <linux/device.h> +#include <linux/of_irq.h> +#include "stmmac.h" + +static int loongson_default_data(struct plat_stmmacenet_data *plat) +{ + plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + plat->has_gmac = 1; + plat->force_sf_dma_mode = 1; + + /* Set default value for multicast hash bins */ + plat->multicast_filter_bins = HASH_TABLE_SIZE; + + /* Set default value for unicast filter entries */ + plat->unicast_filter_entries = 1; + + /* Set the maxmtu to a default of JUMBO_LEN */ + plat->maxmtu = JUMBO_LEN; + + /* Set default number of RX and TX queues to use */ + plat->tx_queues_to_use = 1; + plat->rx_queues_to_use = 1; + + /* Disable Priority config by default */ + plat->tx_queues_cfg[0].use_prio = false; + plat->rx_queues_cfg[0].use_prio = false; + + /* Disable RX queues routing by default */ + plat->rx_queues_cfg[0].pkt_route = 0x0; + + /* Default to phy auto-detection */ + plat->phy_addr = -1; + + plat->dma_cfg->pbl = 32; + plat->dma_cfg->pblx8 = true; + + plat->multicast_filter_bins = 256; + return 0; +} + +static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct plat_stmmacenet_data *plat; + struct stmmac_resources res; + int ret, i, mdio; + struct device_node *np; + + np = dev_of_node(&pdev->dev); + + if (!np) { + pr_info("dwmac_loongson_pci: No OF node\n"); + return -ENODEV; + } + + if (!of_device_is_compatible(np, "loongson, pci-gmac")) { + pr_info("dwmac_loongson_pci: Incompatible OF node\n"); + return -ENODEV; + } + + plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); + if (!plat) + return -ENOMEM; + + if (plat->mdio_node) { + dev_err(&pdev->dev, "Found MDIO subnode\n"); + mdio = true; + } + + if (mdio) { + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) + return -ENOMEM; + plat->mdio_bus_data->needs_reset = true; + } + + plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); + if (!plat->dma_cfg) + return -ENOMEM; + + /* Enable pci device */ + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); + return ret; + } + + /* Get the base address of device */ + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (ret) + return ret; + break; + } + + plat->bus_id = of_alias_get_id(np, "ethernet"); + if (plat->bus_id < 0) + plat->bus_id = pci_dev_id(pdev); + + plat->phy_interface = device_get_phy_mode(&pdev->dev); + if (plat->phy_interface < 0) + dev_err(&pdev->dev, "phy_mode not found\n"); + + plat->interface = PHY_INTERFACE_MODE_GMII; + + pci_set_master(pdev); + + loongson_default_data(plat); + pci_enable_msi(pdev); + memset(&res, 0, sizeof(res)); + res.addr = pcim_iomap_table(pdev)[0]; + + res.irq = of_irq_get_byname(np, "macirq"); + if (res.irq < 0) { + dev_err(&pdev->dev, "IRQ macirq not found\n"); + ret = -ENODEV; + } + + res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); + if (res.wol_irq < 0) { + dev_info(&pdev->dev, "IRQ eth_wake_irq not found, using macirq\n"); + res.wol_irq = res.irq; + } + + res.lpi_irq = of_irq_get_byname(np, "eth_lpi"); + if (res.lpi_irq < 0) { + dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); + ret = -ENODEV; + } + + return stmmac_dvr_probe(&pdev->dev, plat, &res); +} + +static void loongson_dwmac_remove(struct pci_dev *pdev) +{ + int i; + + stmmac_dvr_remove(&pdev->dev); + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + pcim_iounmap_regions(pdev, BIT(i)); + break; + } + + pci_disable_device(pdev); +} + +static int __maybe_unused loongson_dwmac_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = stmmac_suspend(dev); + if (ret) + return ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int __maybe_unused loongson_dwmac_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return stmmac_resume(dev); +} + +static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, + loongson_dwmac_resume); + +static const struct pci_device_id loongson_dwmac_id_table[] = { + { PCI_VDEVICE(LOONGSON, 0x7a03) }, + {} +}; +MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); + +struct pci_driver loongson_dwmac_driver = { + .name = "dwmac-loongson-pci", + .id_table = loongson_dwmac_id_table, + .probe = loongson_dwmac_probe, + .remove = loongson_dwmac_remove, + .driver = { + .pm = &loongson_dwmac_pm_ops, + }, +}; + +module_pci_driver(loongson_dwmac_driver); + +MODULE_DESCRIPTION("Loongson DWMAC PCI driver"); +MODULE_AUTHOR("Qing Zhang <zhangqing@loongson.cn>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 4e70efc45458..92dab609d4f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -573,10 +573,8 @@ static int tc_add_flow(struct stmmac_priv *priv, for (i = 0; i < ARRAY_SIZE(tc_flow_parsers); i++) { ret = tc_flow_parsers[i].fn(priv, cls, entry); - if (!ret) { + if (!ret) entry->in_use = true; - continue; - } } if (!entry->in_use) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d85521989753..6348307bfa84 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -269,9 +269,9 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) err = PTR_ERR(nsim_dev->nodes_ddir); goto err_out; } - debugfs_create_bool("fail_trap_counter_get", 0600, + debugfs_create_bool("fail_trap_drop_counter_get", 0600, nsim_dev->ddir, - &nsim_dev->fail_trap_counter_get); + &nsim_dev->fail_trap_drop_counter_get); nsim_udp_tunnels_debugfs_create(nsim_dev); return 0; @@ -1208,14 +1208,14 @@ static int nsim_rate_node_parent_set(struct devlink_rate *child, } static int -nsim_dev_devlink_trap_hw_counter_get(struct devlink *devlink, - const struct devlink_trap *trap, - u64 *p_drops) +nsim_dev_devlink_trap_drop_counter_get(struct devlink *devlink, + const struct devlink_trap *trap, + u64 *p_drops) { struct nsim_dev *nsim_dev = devlink_priv(devlink); u64 *cnt; - if (nsim_dev->fail_trap_counter_get) + if (nsim_dev->fail_trap_drop_counter_get) return -EINVAL; cnt = &nsim_dev->trap_data->trap_pkt_cnt; @@ -1247,7 +1247,7 @@ static const struct devlink_ops nsim_dev_devlink_ops = { .rate_node_del = nsim_rate_node_del, .rate_leaf_parent_set = nsim_rate_leaf_parent_set, .rate_node_parent_set = nsim_rate_node_parent_set, - .trap_drop_counter_get = nsim_dev_devlink_trap_hw_counter_get, + .trap_drop_counter_get = nsim_dev_devlink_trap_drop_counter_get, }; #define NSIM_DEV_MAX_MACS_DEFAULT 32 diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index f2304e61919a..ae462957dcee 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -249,7 +249,7 @@ struct nsim_dev { bool fail_trap_group_set; bool fail_trap_policer_set; bool fail_trap_policer_counter_get; - bool fail_trap_counter_get; + bool fail_trap_drop_counter_get; struct { struct udp_tunnel_nic_shared utn_shared; u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 6c05c4c8914a..fd61a7cc4fdf 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * Comtrol SV11 card driver +/* Comtrol SV11 card driver * * This is a slightly odd Z85230 synchronous driver. All you need to * know basically is @@ -9,7 +8,7 @@ * * It supports DMA using two DMA channels in SYNC mode. The driver doesn't * use these facilities - * + * * The control port is at io+1, the data at io+3 and turning off the DMA * is done by writing 0 to io+4 * @@ -44,17 +43,15 @@ static int dma; -/* - * Network driver support routines +/* Network driver support routines */ -static inline struct z8530_dev* dev_to_sv(struct net_device *dev) +static inline struct z8530_dev *dev_to_sv(struct net_device *dev) { return (struct z8530_dev *)dev_to_hdlc(dev)->priv; } -/* - * Frame receive. Simple for our card as we do HDLC and there +/* Frame receive. Simple for our card as we do HDLC and there * is no funny garbage involved */ @@ -65,15 +62,13 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb) skb->protocol = hdlc_type_trans(skb, c->netdevice); skb_reset_mac_header(skb); skb->dev = c->netdevice; - /* - * Send it to the PPP layer. We don't have time to process + /* Send it to the PPP layer. We don't have time to process * it right now. */ netif_rx(skb); } -/* - * We've been placed in the UP state +/* We've been placed in the UP state */ static int hostess_open(struct net_device *d) @@ -81,19 +76,18 @@ static int hostess_open(struct net_device *d) struct z8530_dev *sv11 = dev_to_sv(d); int err = -1; - /* - * Link layer up + /* Link layer up */ switch (dma) { - case 0: - err = z8530_sync_open(d, &sv11->chanA); - break; - case 1: - err = z8530_sync_dma_open(d, &sv11->chanA); - break; - case 2: - err = z8530_sync_txdma_open(d, &sv11->chanA); - break; + case 0: + err = z8530_sync_open(d, &sv11->chanA); + break; + case 1: + err = z8530_sync_dma_open(d, &sv11->chanA); + break; + case 2: + err = z8530_sync_txdma_open(d, &sv11->chanA); + break; } if (err) @@ -102,15 +96,15 @@ static int hostess_open(struct net_device *d) err = hdlc_open(d); if (err) { switch (dma) { - case 0: - z8530_sync_close(d, &sv11->chanA); - break; - case 1: - z8530_sync_dma_close(d, &sv11->chanA); - break; - case 2: - z8530_sync_txdma_close(d, &sv11->chanA); - break; + case 0: + z8530_sync_close(d, &sv11->chanA); + break; + case 1: + z8530_sync_dma_close(d, &sv11->chanA); + break; + case 2: + z8530_sync_txdma_close(d, &sv11->chanA); + break; } return err; } @@ -127,8 +121,7 @@ static int hostess_open(struct net_device *d) static int hostess_close(struct net_device *d) { struct z8530_dev *sv11 = dev_to_sv(d); - /* - * Discard new frames + /* Discard new frames */ sv11->chanA.rx_function = z8530_null_rx; @@ -136,32 +129,29 @@ static int hostess_close(struct net_device *d) netif_stop_queue(d); switch (dma) { - case 0: - z8530_sync_close(d, &sv11->chanA); - break; - case 1: - z8530_sync_dma_close(d, &sv11->chanA); - break; - case 2: - z8530_sync_txdma_close(d, &sv11->chanA); - break; + case 0: + z8530_sync_close(d, &sv11->chanA); + break; + case 1: + z8530_sync_dma_close(d, &sv11->chanA); + break; + case 2: + z8530_sync_txdma_close(d, &sv11->chanA); + break; } return 0; } static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd) { - /* struct z8530_dev *sv11=dev_to_sv(d); - z8530_ioctl(d,&sv11->chanA,ifr,cmd) */ return hdlc_ioctl(d, ifr, cmd); } -/* - * Passed network frames, fire them downwind. +/* Passed network frames, fire them downwind. */ static netdev_tx_t hostess_queue_xmit(struct sk_buff *skb, - struct net_device *d) + struct net_device *d) { return z8530_queue_xmit(&dev_to_sv(d)->chanA, skb); } @@ -174,8 +164,7 @@ static int hostess_attach(struct net_device *dev, unsigned short encoding, return -EINVAL; } -/* - * Description block for a Comtrol Hostess SV11 card +/* Description block for a Comtrol Hostess SV11 card */ static const struct net_device_ops hostess_ops = { @@ -189,8 +178,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq) { struct z8530_dev *sv; struct net_device *netdev; - /* - * Get the needed I/O space + /* Get the needed I/O space */ if (!request_region(iobase, 8, "Comtrol SV11")) { @@ -202,8 +190,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq) if (!sv) goto err_kzalloc; - /* - * Stuff in the I/O addressing + /* Stuff in the I/O addressing */ sv->active = 0; @@ -218,7 +205,8 @@ static struct z8530_dev *sv11_init(int iobase, int irq) outb(0, iobase + 4); /* DMA off */ /* We want a fast IRQ for this device. Actually we'd like an even faster - IRQ ;) - This is one driver RtLinux is made for */ + * IRQ ;) - This is one driver RtLinux is made for + */ if (request_irq(irq, z8530_interrupt, 0, "Hostess SV11", sv) < 0) { @@ -232,8 +220,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq) sv->chanB.dev = sv; if (dma) { - /* - * You can have DMA off or 1 and 3 thats the lot + /* You can have DMA off or 1 and 3 thats the lot * on the Comtrol. */ sv->chanA.txdma = 3; @@ -248,11 +235,11 @@ static struct z8530_dev *sv11_init(int iobase, int irq) } /* Kill our private IRQ line the hostess can end up chattering - until the configuration is set */ + * until the configuration is set + */ disable_irq(irq); - /* - * Begin normal initialise + /* Begin normal initialise */ if (z8530_init(sv)) { @@ -268,8 +255,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq) enable_irq(irq); - /* - * Now we can take the IRQ + /* Now we can take the IRQ */ sv->chanA.netdevice = netdev = alloc_hdlcdev(sv); @@ -340,7 +326,8 @@ static struct z8530_dev *sv11_unit; int init_module(void) { - if ((sv11_unit = sv11_init(io, irq)) == NULL) + sv11_unit = sv11_init(io, irq); + if (!sv11_unit) return -ENODEV; return 0; } diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index a0ce95a287c5..2b0c7232e91f 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -70,21 +70,16 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct nxp_nci_info *info = nci_get_drvdata(ndev); int r; - if (!info->phy_ops->write) { - r = -ENOTSUPP; - goto send_exit; - } + if (!info->phy_ops->write) + return -EOPNOTSUPP; - if (info->mode != NXP_NCI_MODE_NCI) { - r = -EINVAL; - goto send_exit; - } + if (info->mode != NXP_NCI_MODE_NCI) + return -EINVAL; r = info->phy_ops->write(info->phy_id, skb); if (r < 0) kfree_skb(skb); -send_exit: return r; } @@ -104,10 +99,8 @@ int nxp_nci_probe(void *phy_id, struct device *pdev, int r; info = devm_kzalloc(pdev, sizeof(struct nxp_nci_info), GFP_KERNEL); - if (!info) { - r = -ENOMEM; - goto probe_exit; - } + if (!info) + return -ENOMEM; info->phy_id = phy_id; info->pdev = pdev; @@ -120,31 +113,25 @@ int nxp_nci_probe(void *phy_id, struct device *pdev, if (info->phy_ops->set_mode) { r = info->phy_ops->set_mode(info->phy_id, NXP_NCI_MODE_COLD); if (r < 0) - goto probe_exit; + return r; } info->mode = NXP_NCI_MODE_COLD; info->ndev = nci_allocate_device(&nxp_nci_ops, NXP_NCI_NFC_PROTOCOLS, NXP_NCI_HDR_LEN, 0); - if (!info->ndev) { - r = -ENOMEM; - goto probe_exit; - } + if (!info->ndev) + return -ENOMEM; nci_set_parent_dev(info->ndev, pdev); nci_set_drvdata(info->ndev, info); r = nci_register_device(info->ndev); - if (r < 0) - goto probe_exit_free_nci; + if (r < 0) { + nci_free_device(info->ndev); + return r; + } *ndev = info->ndev; - - goto probe_exit; - -probe_exit_free_nci: - nci_free_device(info->ndev); -probe_exit: return r; } EXPORT_SYMBOL(nxp_nci_probe); diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c index dae0c8030e95..119bf305c642 100644 --- a/drivers/nfc/nxp-nci/firmware.c +++ b/drivers/nfc/nxp-nci/firmware.c @@ -95,10 +95,8 @@ static int nxp_nci_fw_send_chunk(struct nxp_nci_info *info) int r; skb = nci_skb_alloc(info->ndev, info->max_payload, GFP_KERNEL); - if (!skb) { - r = -ENOMEM; - goto chunk_exit; - } + if (!skb) + return -ENOMEM; chunk_len = info->max_payload - NXP_NCI_FW_HDR_LEN - NXP_NCI_FW_CRC_LEN; remaining_len = fw_info->frame_size - fw_info->written; @@ -124,7 +122,6 @@ static int nxp_nci_fw_send_chunk(struct nxp_nci_info *info) kfree_skb(skb); -chunk_exit: return r; } diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 83f23774b908..d61bbbf11979 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -23,6 +23,7 @@ struct mptcp_ext { u64 data_seq; u32 subflow_seq; u16 data_len; + __sum16 csum; u8 use_map:1, dsn64:1, data_fin:1, @@ -31,7 +32,8 @@ struct mptcp_ext { mpc_map:1, frozen:1, reset_transient:1; - u8 reset_reason:4; + u8 reset_reason:4, + csum_reqd:1; }; #define MPTCP_RM_IDS_MAX 8 @@ -63,8 +65,9 @@ struct mptcp_out_options { struct mptcp_rm_list rm_list; u8 join_id; u8 backup; - u8 reset_reason:4; - u8 reset_transient:1; + u8 reset_reason:4, + reset_transient:1, + csum_reqd:1; u32 nonce; u64 thmac; u32 token; diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 775a46d0b0f0..6bf43176f14c 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -73,6 +73,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __field(u64, data_seq) __field(u32, subflow_seq) __field(u16, data_len) + __field(u16, csum) __field(u8, use_map) __field(u8, dsn64) __field(u8, data_fin) @@ -82,6 +83,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __field(u8, frozen) __field(u8, reset_transient) __field(u8, reset_reason) + __field(u8, csum_reqd) ), TP_fast_assign( @@ -89,6 +91,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __entry->data_seq = mpext->data_seq; __entry->subflow_seq = mpext->subflow_seq; __entry->data_len = mpext->data_len; + __entry->csum = (__force u16)mpext->csum; __entry->use_map = mpext->use_map; __entry->dsn64 = mpext->dsn64; __entry->data_fin = mpext->data_fin; @@ -98,16 +101,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __entry->frozen = mpext->frozen; __entry->reset_transient = mpext->reset_transient; __entry->reset_reason = mpext->reset_reason; + __entry->csum_reqd = mpext->csum_reqd; ), - TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u", + TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u", __entry->data_ack, __entry->data_seq, __entry->subflow_seq, __entry->data_len, - __entry->use_map, __entry->dsn64, - __entry->data_fin, __entry->use_ack, - __entry->ack64, __entry->mpc_map, - __entry->frozen, __entry->reset_transient, - __entry->reset_reason) + __entry->csum, __entry->use_map, + __entry->dsn64, __entry->data_fin, + __entry->use_ack, __entry->ack64, + __entry->mpc_map, __entry->frozen, + __entry->reset_transient, __entry->reset_reason, + __entry->csum_reqd) ); DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status, diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 8eb3c0844bff..7b05f7102321 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -105,6 +105,7 @@ struct mptcp_info { __u64 mptcpi_rcv_nxt; __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; }; /* diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 5ae3ace84de0..332b18f318f8 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -64,6 +64,8 @@ enum { SEG6_LOCAL_ACTION_END_AM = 14, /* custom BPF action */ SEG6_LOCAL_ACTION_END_BPF = 15, + /* decap and lookup of DA in v4 or v6 table */ + SEG6_LOCAL_ACTION_END_DT46 = 16, __SEG6_LOCAL_ACTION_MAX, }; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index e3f6ff05a528..1a705a4ef7fa 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -108,7 +108,8 @@ static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev) netdev_features_t ret; ret = real_dev->hw_enc_features & - (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL); + (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE | + NETIF_F_GSO_ENCAP_ALL); if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK)) return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index da3256a3eed0..8789a57af543 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -113,9 +113,7 @@ static void __vlan_add_list(struct net_bridge_vlan *v) headp = &vg->vlan_list; list_for_each_prev(hpos, headp) { vent = list_entry(hpos, struct net_bridge_vlan, vlist); - if (v->vid < vent->vid) - continue; - else + if (v->vid >= vent->vid) break; } list_add_rcu(&v->vlist, hpos); diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index cac30e676ac9..23267c8db7c4 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -480,7 +480,7 @@ got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); if (!phyinfo) { res = -ENOMEM; - goto out_err; + goto out; } phy_layer->id = phyid; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 4ff38cb08f4b..60bf3b877957 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -87,10 +87,10 @@ struct seg6_end_dt_info { int vrf_ifindex; int vrf_table; - /* tunneled packet proto and family (IPv4 or IPv6) */ - __be16 proto; + /* tunneled packet family (IPv4 or IPv6). + * Protocol and header length are inferred from family. + */ u16 family; - int hdrlen; }; struct pcpu_seg6_local_counters { @@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, info->net = net; info->vrf_ifindex = vrf_ifindex; - switch (family) { - case AF_INET: - info->proto = htons(ETH_P_IP); - info->hdrlen = sizeof(struct iphdr); - break; - case AF_INET6: - info->proto = htons(ETH_P_IPV6); - info->hdrlen = sizeof(struct ipv6hdr); - break; - default: - return -EINVAL; - } - info->family = family; info->mode = DT_VRF_MODE; @@ -622,22 +609,44 @@ error: } static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, - struct seg6_local_lwt *slwt) + struct seg6_local_lwt *slwt, u16 family) { struct seg6_end_dt_info *info = &slwt->dt_info; struct net_device *vrf; + __be16 protocol; + int hdrlen; vrf = end_dt_get_vrf_rcu(skb, info); if (unlikely(!vrf)) goto drop; - skb->protocol = info->proto; + switch (family) { + case AF_INET: + protocol = htons(ETH_P_IP); + hdrlen = sizeof(struct iphdr); + break; + case AF_INET6: + protocol = htons(ETH_P_IPV6); + hdrlen = sizeof(struct ipv6hdr); + break; + case AF_UNSPEC: + fallthrough; + default: + goto drop; + } + + if (unlikely(info->family != AF_UNSPEC && info->family != family)) { + pr_warn_once("seg6local: SRv6 End.DT* family mismatch"); + goto drop; + } + + skb->protocol = protocol; skb_dst_drop(skb); - skb_set_transport_header(skb, info->hdrlen); + skb_set_transport_header(skb, hdrlen); - return end_dt_vrf_rcv(skb, info->family, vrf); + return end_dt_vrf_rcv(skb, family, vrf); drop: kfree_skb(skb); @@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb, goto legacy_mode; /* DT6_VRF_MODE */ - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET6); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -767,6 +776,36 @@ drop: return -EINVAL; } +#ifdef CONFIG_NET_L3_MASTER_DEV +static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack); +} + +static int input_action_end_dt46(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + unsigned int off = 0; + int nexthdr; + + nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL); + if (unlikely(nexthdr < 0)) + goto drop; + + switch (nexthdr) { + case IPPROTO_IPIP: + return input_action_end_dt4(skb, slwt); + case IPPROTO_IPV6: + return input_action_end_dt6(skb, slwt); + } + +drop: + kfree_skb(skb); + return -EINVAL; +} +#endif + /* push an SRH on top of the current one */ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -969,6 +1008,17 @@ static struct seg6_action_desc seg6_action_table[] = { .input = input_action_end_dt6, }, { + .action = SEG6_LOCAL_ACTION_END_DT46, + .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), + .optattrs = SEG6_F_LOCAL_COUNTERS, +#ifdef CONFIG_NET_L3_MASTER_DEV + .input = input_action_end_dt46, + .slwt_ops = { + .build_state = seg6_end_dt46_build, + }, +#endif + }, + { .action = SEG6_LOCAL_ACTION_END_B6, .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), .optattrs = SEG6_F_LOCAL_COUNTERS, diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 1ec4d36a39f0..6c2639bb9c19 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -23,6 +23,7 @@ struct mptcp_pernet { u8 mptcp_enabled; unsigned int add_addr_timeout; + u8 checksum_enabled; }; static struct mptcp_pernet *mptcp_get_pernet(struct net *net) @@ -40,10 +41,16 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net) return mptcp_get_pernet(net)->add_addr_timeout; } +int mptcp_is_checksum_enabled(struct net *net) +{ + return mptcp_get_pernet(net)->checksum_enabled; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->checksum_enabled = 0; } #ifdef CONFIG_SYSCTL @@ -65,6 +72,14 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "checksum_enabled", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, {} }; @@ -82,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[0].data = &pernet->mptcp_enabled; table[1].data = &pernet->add_addr_timeout; + table[2].data = &pernet->checksum_enabled; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index eb2dc6dbe212..e7e60bc1fb96 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), + SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index f0da4f060fe1..92e56c0cfbdd 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -18,6 +18,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ + MPTCP_MIB_DATACSUMERR, /* The data checksum fail */ MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */ MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */ MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */ diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index f16d9b5ee978..8f88ddeab6a2 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, info->mptcpi_write_seq = READ_ONCE(msk->write_seq); info->mptcpi_snd_una = READ_ONCE(msk->snd_una); info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); + info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); unlock_sock_fast(sk, slow); } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 6b825fb3fa83..1aec01686c1a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb, else expected_opsize = TCPOLEN_MPTCP_MPC_SYN; } - if (opsize != expected_opsize) + + /* Cfr RFC 8684 Section 3.3.0: + * If a checksum is present but its use had + * not been negotiated in the MP_CAPABLE handshake, the receiver MUST + * close the subflow with a RST, as it is not behaving as negotiated. + * If a checksum is not present when its use has been negotiated, the + * receiver MUST close the subflow with a RST, as it is considered + * broken + * We parse even option with mismatching csum presence, so that + * later in subflow_data_ready we can trigger the reset. + */ + if (opsize != expected_opsize && + (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA || + opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break; /* try to be gentle vs future versions on the initial syn */ @@ -66,16 +79,9 @@ static void mptcp_parse_option(const struct sk_buff *skb, * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0." - * - * Section 3.3.0: - * "If a checksum is not present when its use has been - * negotiated, the receiver MUST close the subflow with a RST as - * it is considered broken." - * - * We don't implement DSS checksum - fall back to TCP. */ if (flags & MPTCP_CAP_CHECKSUM_REQD) - break; + mp_opt->csum_reqd = 1; mp_opt->mp_capable = 1; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { @@ -86,7 +92,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } - if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) { + if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) { /* Section 3.1.: * "the data parameters in a MP_CAPABLE are semantically * equivalent to those in a DSS option and can be used @@ -98,9 +104,14 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d", + if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum_reqd = 1; + ptr += 2; + } + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", version, flags, opsize, mp_opt->sndr_key, - mp_opt->rcvr_key, mp_opt->data_len); + mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; case MPTCPOPT_MP_JOIN: @@ -171,10 +182,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; } - /* RFC 6824, Section 3.3: - * If a checksum is present, but its use had - * not been negotiated in the MP_CAPABLE handshake, - * the checksum field MUST be ignored. + /* Always parse any csum presence combination, we will enforce + * RFC 8684 Section 3.3.0 checks later in subflow_data_ready */ if (opsize != expected_opsize && opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) @@ -209,9 +218,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", + if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { + mp_opt->csum_reqd = 1; + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + ptr += 2; + } + + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", mp_opt->data_seq, mp_opt->subflow_seq, - mp_opt->data_len); + mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum); } break; @@ -323,9 +338,12 @@ static void mptcp_parse_option(const struct sk_buff *skb, } } -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; @@ -341,6 +359,7 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->dss = 0; mp_opt->mp_prio = 0; mp_opt->reset = 0; + mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled); length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -380,6 +399,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { opts->suboptions = OPTION_MPTCP_MPC_SYN; + opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { @@ -435,8 +455,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; + u8 len; /* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than @@ -465,16 +487,26 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); /* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data */ - if (data_len > 0) - *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4); - else + if (data_len > 0) { + len = TCPOLEN_MPTCP_MPC_ACK_DATA; + if (opts->csum_reqd) { + /* we need to propagate more info to csum the pseudo hdr */ + opts->ext_copy.data_seq = mpext->data_seq; + opts->ext_copy.subflow_seq = mpext->subflow_seq; + opts->ext_copy.csum = mpext->csum; + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } + *size = ALIGN(len, 4); + } else { *size = TCPOLEN_MPTCP_MPC_ACK; + } pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", subflow, subflow->local_key, subflow->remote_key, @@ -535,18 +567,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool ret = false; u64 ack_seq; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { - unsigned int map_size; + unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; - map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; + if (mpext) { + if (opts->csum_reqd) + map_size += TCPOLEN_MPTCP_DSS_CHECKSUM; - remaining -= map_size; - dss_size = map_size; - if (mpext) opts->ext_copy = *mpext; + } + remaining -= map_size; + dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; @@ -789,6 +824,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; + opts->csum_reqd = subflow_req->csum_reqd; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu", subflow_req, subflow_req->local_key); @@ -1007,7 +1043,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return; } - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; @@ -1099,6 +1135,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; + mpext->csum_reqd = mp_opt.csum_reqd; + + if (mpext->csum_reqd) + mpext->csum = mp_opt.csum; } } @@ -1118,25 +1158,50 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } +static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + struct csum_pseudo_header header; + __wsum csum; + + /* cfr RFC 8684 3.3.1.: + * the data sequence number used in the pseudo-header is + * always the 64-bit value, irrespective of what length is used in the + * DSS option itself. + */ + header.data_seq = cpu_to_be64(mpext->data_seq); + header.subflow_seq = htonl(mpext->subflow_seq); + header.data_len = htons(mpext->data_len); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + return (__force u16)csum_fold(csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions) { - u8 len; + u8 len, flag = MPTCP_CAP_HMAC_SHA256; - if (OPTION_MPTCP_MPC_SYN & opts->suboptions) + if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYN; - else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) + } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - else if (opts->ext_copy.data_len) + } else if (opts->ext_copy.data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; - else + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } else { len = TCPOLEN_MPTCP_MPC_ACK; + } + + if (opts->csum_reqd) + flag |= MPTCP_CAP_CHECKSUM_REQD; *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, - MPTCP_CAP_HMAC_SHA256); + flag); if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions)) @@ -1152,8 +1217,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, if (!opts->ext_copy.data_len) goto mp_capable_done; - put_unaligned_be32(opts->ext_copy.data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + mptcp_make_csum(&opts->ext_copy), ptr); + } else { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } ptr += 1; } @@ -1305,6 +1375,9 @@ mp_capable_done: flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin) flags |= MPTCP_DSS_DATA_FIN; + + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); @@ -1324,8 +1397,13 @@ mp_capable_done: ptr += 2; put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; - put_unaligned_be32(mpext->data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(mpext->data_len << 16 | + mptcp_make_csum(mpext), ptr); + } else { + put_unaligned_be32(mpext->data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 993095089990..42fc7187beee 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1308,6 +1308,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk) return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation); } +/* note: this always recompute the csum on the whole skb, even + * if we just appended a single frag. More status info needed + */ +static void mptcp_update_data_checksum(struct sk_buff *skb, int added) +{ + struct mptcp_ext *mpext = mptcp_get_ext(skb); + __wsum csum = ~csum_unfold(mpext->csum); + int offset = skb->len - added; + + mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset)); +} + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1402,10 +1414,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (zero_window_probe) { mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mpext->frozen = 1; - ret = 0; + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); tcp_push_pending_frames(ssk); + return 0; } out: + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); mptcp_subflow_ctx(ssk)->rel_write_seq += ret; return ret; } @@ -2359,8 +2375,8 @@ static void __mptcp_retrans(struct sock *sk) /* limit retransmission to the bytes already sent on some subflows */ info.sent = 0; - info.limit = dfrag->already_sent; - while (info.sent < dfrag->already_sent) { + info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + while (info.sent < info.limit) { if (!mptcp_alloc_tx_skb(sk, ssk)) break; @@ -2372,9 +2388,11 @@ static void __mptcp_retrans(struct sock *sk) copied += ret; info.sent += ret; } - if (copied) + if (copied) { + dfrag->already_sent = max(dfrag->already_sent, info.sent); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + } mptcp_set_timeout(sk, ssk); release_sock(ssk); @@ -2453,6 +2471,7 @@ static int __mptcp_init_sock(struct sock *sk) msk->ack_hint = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; + WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_init(msk); @@ -2793,6 +2812,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->token = subflow_req->token; msk->subflow = NULL; WRITE_ONCE(msk->fully_established, false); + if (mp_opt->csum_reqd) + WRITE_ONCE(msk->csum_enabled, true); msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 89f6b73783d5..16e50caf200e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -68,6 +68,8 @@ #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 +#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) + /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_HMAC_LEN 20 @@ -124,6 +126,7 @@ struct mptcp_options_received { u64 data_seq; u32 subflow_seq; u16 data_len; + __sum16 csum; u16 mp_capable : 1, mp_join : 1, fastclose : 1, @@ -133,6 +136,7 @@ struct mptcp_options_received { rm_addr : 1, mp_prio : 1, echo : 1, + csum_reqd : 1, backup : 1; u32 token; u32 nonce; @@ -234,6 +238,7 @@ struct mptcp_sock { bool snd_data_fin_enable; bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ + bool csum_enabled; spinlock_t join_list_lock; struct sock *ack_hint; struct work_struct work; @@ -335,11 +340,19 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } +struct csum_pseudo_header { + __be64 data_seq; + __be32 subflow_seq; + __be16 data_len; + __sum16 csum; +}; + struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, - backup : 1; + backup : 1, + csum_reqd : 1; u8 local_id; u8 remote_id; u64 local_key; @@ -387,6 +400,8 @@ struct mptcp_subflow_context { u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; + __wsum map_data_csum; + u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, @@ -396,6 +411,8 @@ struct mptcp_subflow_context { pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, + map_csum_reqd : 1, + map_data_fin : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, @@ -525,6 +542,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); +int mptcp_is_checksum_enabled(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); @@ -576,7 +594,8 @@ int __init mptcp_proto_v6_init(void); struct sock *mptcp_sk_clone(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct request_sock *req); -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 33956337c46b..6b1cd4257edf 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -108,6 +108,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } @@ -150,7 +151,7 @@ static int subflow_check_req(struct request_sock *req, return -EINVAL; #endif - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@ -247,7 +248,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, int err; subflow_init_req(req, sk_listener); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable && mp_opt.mp_join) return -EINVAL; @@ -394,7 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (subflow->request_mptcp) { if (!mp_opt.mp_capable) { MPTCP_INC_STATS(sock_net(sk), @@ -404,6 +405,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto fallback; } + if (mp_opt.csum_reqd) + WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; @@ -638,7 +641,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * reordered MPC will cause fallback, but we don't have other * options. */ - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; @@ -648,7 +651,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); @@ -824,10 +827,92 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) return true; } +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, + bool csum_reqd) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; + u32 offset, seq, delta; + __wsum csum; + int len; + + if (!csum_reqd) + return MAPPING_OK; + + /* mapping already validated on previous traversal */ + if (subflow->map_csum_len == subflow->map_data_len) + return MAPPING_OK; + + /* traverse the receive queue, ensuring it contains a full + * DSS mapping and accumulating the related csum. + * Preserve the accoumlate csum across multiple calls, to compute + * the csum only once + */ + delta = subflow->map_data_len - subflow->map_csum_len; + for (;;) { + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; + offset = seq - TCP_SKB_CB(skb)->seq; + + /* if the current skb has not been accounted yet, csum its contents + * up to the amount covered by the current DSS + */ + if (offset < skb->len) { + __wsum csum; + + len = min(skb->len - offset, delta); + csum = skb_checksum(skb, offset, len, 0); + subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum, + subflow->map_csum_len); + + delta -= len; + subflow->map_csum_len += len; + } + if (delta == 0) + break; + + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { + /* if this subflow is closed, the partial mapping + * will be never completed; flush the pending skbs, so + * that subflow_sched_work_if_closed() can kick in + */ + if (unlikely(ssk->sk_state == TCP_CLOSE)) + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + + /* not enough data to validate the csum */ + return MAPPING_EMPTY; + } + + /* the DSS mapping for next skbs will be validated later, + * when a get_mapping_status call will process such skb + */ + skb = skb->next; + } + + /* note that 'map_data_len' accounts only for the carried data, does + * not include the eventual seq increment due to the data fin, + * while the pseudo header requires the original DSS data len, + * including that + */ + header.data_seq = cpu_to_be64(subflow->map_seq); + header.subflow_seq = htonl(subflow->map_subflow_seq); + header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + } + + return MAPPING_OK; +} + static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -920,9 +1005,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && - subflow->map_data_len == data_len) { + subflow->map_data_len == data_len && + subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + goto validate_csum; } /* If this skb data are fully covered by the current mapping, @@ -934,17 +1020,27 @@ static enum mapping_status get_mapping_status(struct sock *ssk, } /* will validate the next map after consuming the current one */ - return MAPPING_OK; + goto validate_csum; } subflow->map_seq = map_seq; subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_data_len = data_len; subflow->map_valid = 1; + subflow->map_data_fin = mpext->data_fin; subflow->mpc_map = mpext->mpc_map; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", + subflow->map_csum_reqd = mpext->csum_reqd; + subflow->map_csum_len = 0; + subflow->map_data_csum = csum_unfold(mpext->csum); + + /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) + return MAPPING_INVALID; + + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", subflow->map_seq, subflow->map_subflow_seq, - subflow->map_data_len); + subflow->map_data_len, subflow->map_csum_reqd, + subflow->map_data_csum); validate_seq: /* we revalidate valid mapping on new skb, because we must ensure @@ -954,7 +1050,9 @@ validate_seq: return MAPPING_INVALID; skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + +validate_csum: + return validate_data_csum(ssk, skb, csum_reqd); } static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 67954afef4e1..21ccf450e249 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -860,7 +860,7 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_stream_has_data); -static s64 vsock_has_data(struct vsock_sock *vsk) +static s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); @@ -1866,10 +1866,11 @@ out: return err; } -static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait, - long timeout, - struct vsock_transport_recv_notify_data *recv_data, - size_t target) +static int vsock_connectible_wait_data(struct sock *sk, + struct wait_queue_entry *wait, + long timeout, + struct vsock_transport_recv_notify_data *recv_data, + size_t target) { const struct vsock_transport *transport; struct vsock_sock *vsk; @@ -1880,7 +1881,7 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait, err = 0; transport = vsk->transport; - while ((data = vsock_has_data(vsk)) == 0) { + while ((data = vsock_connectible_has_data(vsk)) == 0) { prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE); if (sk->sk_err != 0 || @@ -1967,7 +1968,8 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg, while (1) { ssize_t read; - err = vsock_wait_data(sk, &wait, timeout, &recv_data, target); + err = vsock_connectible_wait_data(sk, &wait, timeout, + &recv_data, target); if (err <= 0) break; @@ -2022,7 +2024,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - err = vsock_wait_data(sk, &wait, timeout, NULL, 0); + err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0); if (err <= 0) goto out; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 23704a6bc437..f014ccfdd9c2 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -413,7 +413,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, struct virtio_vsock_pkt *pkt; int dequeued_len = 0; size_t user_buf_len = msg_data_left(msg); - bool copy_failed = false; bool msg_ready = false; spin_lock_bh(&vvs->rx_lock); @@ -426,7 +425,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, while (!msg_ready) { pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list); - if (!copy_failed) { + if (dequeued_len >= 0) { size_t pkt_len; size_t bytes_to_copy; @@ -443,11 +442,9 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy); if (err) { - /* Copy of message failed, set flag to skip - * copy path for rest of fragments. Rest of + /* Copy of message failed. Rest of * fragments will be freed without copy. */ - copy_failed = true; dequeued_len = err; } else { user_buf_len -= bytes_to_copy; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 9ca5f1ba461e..69351c3eb68c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:f:t" +optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 sin="" sout="" @@ -22,6 +22,7 @@ sndbuf=0 rcvbuf=0 options_log=true do_tcp=0 +checksum=false filesize=0 if [ $tc_loss -eq 100 ];then @@ -47,6 +48,7 @@ usage() { echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" + echo -e "\t-C: enable the MPTCP data checksum" } while getopts "$optstring" option;do @@ -104,6 +106,9 @@ while getopts "$optstring" option;do "t") do_tcp=$((do_tcp+1)) ;; + "C") + checksum=true + ;; "?") usage $0 exit 1 @@ -200,6 +205,12 @@ ip -net "$ns4" route add default via dead:beef:3::2 # use TCP syn cookies, even if no flooding was detected. ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 +if $checksum; then + for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1 + done +fi + set_ethtool_flags() { local ns="$1" local dev="$2" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index fd99485cf2a4..523c7797f30a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -12,6 +12,7 @@ timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 +checksum=0 do_all_tests=1 TEST_COUNT=0 @@ -49,6 +50,9 @@ init() ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + if [ $checksum -eq 1 ]; then + ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 + fi done # ns1 ns2 @@ -124,6 +128,17 @@ reset_with_add_addr_timeout() -j DROP } +reset_with_checksum() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable +} + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" @@ -476,6 +491,45 @@ run_tests() fi } +chk_csum_nr() +{ + local msg=${1:-""} + local count + local dump_stats + + if [ ! -z "$msg" ]; then + printf "%02u" "$TEST_COUNT" + else + echo -n " " + fi + printf " %-36s %s" "$msg" "sum" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + echo -n " - csum " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + chk_join_nr() { local msg="$1" @@ -523,6 +577,9 @@ chk_join_nr() echo Client ns stats ip netns exec $ns2 nstat -as | grep MPTcp fi + if [ $checksum -eq 1 ]; then + chk_csum_nr + fi } chk_add_nr() @@ -1374,6 +1431,37 @@ syncookies_tests() chk_add_nr 1 1 } +checksum_tests() +{ + # checksum test 0 0 + reset_with_checksum 0 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 0" + + # checksum test 1 1 + reset_with_checksum 1 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 1" + + # checksum test 0 1 + reset_with_checksum 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 1" + + # checksum test 1 0 + reset_with_checksum 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 0" +} + all_tests() { subflows_tests @@ -1387,6 +1475,7 @@ all_tests() backup_tests add_addr_ports_tests syncookies_tests + checksum_tests } usage() @@ -1403,7 +1492,9 @@ usage() echo " -b backup_tests" echo " -p add_addr_ports_tests" echo " -k syncookies_tests" + echo " -S checksum_tests" echo " -c capture pcap files" + echo " -C enable data checksum" echo " -h help" } @@ -1418,13 +1509,16 @@ make_file "$sin" "server" 1 trap cleanup EXIT for arg in "$@"; do - # check for "capture" arg before launching tests + # check for "capture/checksum" args before launching tests if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then capture=1 fi + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then + checksum=1 + fi - # exception for the capture option, the rest means: a part of the tests - if [ "${arg}" != "-c" ]; then + # exception for the capture/checksum options, the rest means: a part of the tests + if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then do_all_tests=0 fi done @@ -1434,7 +1528,7 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fsltra64bpkch' opt; do +while getopts 'fsltra64bpkchCS' opt; do case $opt in f) subflows_tests @@ -1469,8 +1563,13 @@ while getopts 'fsltra64bpkch' opt; do k) syncookies_tests ;; + S) + checksum_tests + ;; c) ;; + C) + ;; h | *) usage ;; diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh new file mode 100755 index 000000000000..75ada17ac061 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -0,0 +1,573 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> + +# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for +# implementing IPv4/IPv6 L3 VPN use cases. +# +# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and +# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and +# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a +# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic. +# The SRv6 End.DT46 Behavior implementation is meant to support the +# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel. +# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies +# the setup and operations of SRv6 VPNs. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with +# each other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the +# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected +# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 +# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach +# hs-t200-3 and vice versa. +# +# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT46 Behavior and c) VRF. +# +# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly +# consider an example where, within the same domain of tenant 100, the host +# hs-t100-1 pings the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as a arp/ndp proxy. +# +# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2, +# the router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the +# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6) +# core network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the +# host hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the roles of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6 +# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are +# able to connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::3/64 | | | | cafe::4/64 | | +# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-t${tid}-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT46 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} + + ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_ipv6_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv4/IPv6 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit 0 +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} |