summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml6
-rw-r--r--Documentation/firmware-guide/acpi/dsd/phy.rst15
-rw-r--r--Documentation/firmware-guide/acpi/index.rst1
-rw-r--r--Documentation/networking/mptcp-sysctl.rst8
-rw-r--r--arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi46
-rw-r--r--arch/mips/boot/dts/loongson/ls7a-pch.dtsi6
-rw-r--r--drivers/net/dsa/sja1105/sja1105.h3
-rw-r--r--drivers/net/dsa/sja1105/sja1105_clocking.c20
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c51
-rw-r--r--drivers/net/dsa/sja1105/sja1105_spi.c14
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c26
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c7
-rw-r--r--drivers/net/ethernet/marvell/pxa168_eth.c2
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.c5
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c218
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c4
-rw-r--r--drivers/net/netdevsim/dev.c14
-rw-r--r--drivers/net/netdevsim/netdevsim.h2
-rw-r--r--drivers/net/wan/hostess_sv11.c113
-rw-r--r--drivers/nfc/nxp-nci/core.c39
-rw-r--r--drivers/nfc/nxp-nci/firmware.c7
-rw-r--r--include/net/mptcp.h9
-rw-r--r--include/trace/events/mptcp.h17
-rw-r--r--include/uapi/linux/mptcp.h1
-rw-r--r--include/uapi/linux/seg6_local.h2
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/caif/cfcnfg.c2
-rw-r--r--net/ipv6/seg6_local.c94
-rw-r--r--net/mptcp/ctrl.c16
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h1
-rw-r--r--net/mptcp/mptcp_diag.c1
-rw-r--r--net/mptcp/options.c154
-rw-r--r--net/mptcp/protocol.c29
-rw-r--r--net/mptcp/protocol.h23
-rw-r--r--net/mptcp/subflow.c120
-rw-r--r--net/vmw_vsock/af_vsock.c18
-rw-r--r--net/vmw_vsock/virtio_transport_common.c7
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh13
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh107
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh573
46 files changed, 1554 insertions, 264 deletions
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 9c0ce92e9212..56f2235f5fb5 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -51,11 +51,15 @@ properties:
- allwinner,sun8i-r40-emac
- allwinner,sun8i-v3s-emac
- allwinner,sun50i-a64-emac
+ - loongson,ls2k-dwmac
+ - loongson,ls7a-dwmac
- amlogic,meson6-dwmac
- amlogic,meson8b-dwmac
- amlogic,meson8m2-dwmac
- amlogic,meson-gxbb-dwmac
- amlogic,meson-axg-dwmac
+ - loongson,ls2k-dwmac
+ - loongson,ls7a-dwmac
- ingenic,jz4775-mac
- ingenic,x1000-mac
- ingenic,x1600-mac
@@ -363,6 +367,8 @@ allOf:
- allwinner,sun8i-r40-emac
- allwinner,sun8i-v3s-emac
- allwinner,sun50i-a64-emac
+ - loongson,ls2k-dwmac
+ - loongson,ls7a-dwmac
- ingenic,jz4775-mac
- ingenic,x1000-mac
- ingenic,x1600-mac
diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst
index 7d01ae8b3cc6..0d49bad2ea9c 100644
--- a/Documentation/firmware-guide/acpi/dsd/phy.rst
+++ b/Documentation/firmware-guide/acpi/dsd/phy.rst
@@ -27,7 +27,8 @@ network interfaces that have PHYs connected to MAC via MDIO bus.
During the MDIO bus driver initialization, PHYs on this bus are probed
using the _ADR object as shown below and are registered on the MDIO bus.
-::
+.. code-block:: none
+
Scope(\_SB.MDI0)
{
Device(PHY1) {
@@ -60,7 +61,9 @@ component (PHYs on the MDIO bus).
a) Silicon Component
This node describes the MDIO controller, MDI0
---------------------------------------------
-::
+
+.. code-block:: none
+
Scope(_SB)
{
Device(MDI0) {
@@ -80,7 +83,9 @@ This node describes the MDIO controller, MDI0
b) Platform Component
The PHY1 and PHY2 nodes represent the PHYs connected to MDIO bus MDI0
---------------------------------------------------------------------
-::
+
+.. code-block:: none
+
Scope(\_SB.MDI0)
{
Device(PHY1) {
@@ -98,7 +103,9 @@ DSDT entries representing MAC nodes
Below are the MAC nodes where PHY nodes are referenced.
phy-mode and phy-handle are used as explained earlier.
------------------------------------------------------
-::
+
+.. code-block:: none
+
Scope(\_SB.MCE0.PR17)
{
Name (_DSD, Package () {
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index f72b5f1769fb..a99ee402b212 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -11,6 +11,7 @@ ACPI Support
dsd/graph
dsd/data-node-references
dsd/leds
+ dsd/phy
enumeration
osi
method-customizing
diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index 3b352e5f6300..ee06fd782465 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -24,3 +24,11 @@ add_addr_timeout - INTEGER (seconds)
sysctl.
Default: 120
+
+checksum_enabled - BOOLEAN
+ Control whether DSS checksum can be enabled.
+
+ DSS checksum can be enabled if the value is nonzero. This is a
+ per-namespace sysctl.
+
+ Default: 0
diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
index 569e814def83..5747f171de29 100644
--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
+++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
@@ -114,6 +114,52 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x18000000 0x0 0x00010000>,
<0x02000000 0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>;
+ gmac@3,0 {
+ compatible = "pci0014,7a03.0",
+ "pci0014,7a03",
+ "pciclass0c0320",
+ "pciclass0c03",
+ "loongson, pci-gmac";
+
+ reg = <0x1800 0x0 0x0 0x0 0x0>;
+ interrupts = <12 IRQ_TYPE_LEVEL_LOW>,
+ <13 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "macirq", "eth_lpi";
+ interrupt-parent = <&liointc0>;
+ phy-mode = "rgmii";
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
+ };
+
+ gmac@3,1 {
+ compatible = "pci0014,7a03.0",
+ "pci0014,7a03",
+ "pciclass0c0320",
+ "pciclass0c03",
+ "loongson, pci-gmac";
+
+ reg = <0x1900 0x0 0x0 0x0 0x0>;
+ interrupts = <14 IRQ_TYPE_LEVEL_LOW>,
+ <15 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "macirq", "eth_lpi";
+ interrupt-parent = <&liointc0>;
+ phy-mode = "rgmii";
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+ phy1: ethernet-phy@1 {
+ reg = <0>;
+ };
+ };
+ };
+
ehci@4,1 {
compatible = "pci0014,7a14.0",
"pci0014,7a14",
diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
index f99a7a11fded..58b9bb47c58a 100644
--- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
+++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
@@ -186,7 +186,8 @@
compatible = "pci0014,7a03.0",
"pci0014,7a03",
"pciclass020000",
- "pciclass0200";
+ "pciclass0200",
+ "loongson, pci-gmac";
reg = <0x1800 0x0 0x0 0x0 0x0>;
interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
@@ -208,7 +209,8 @@
compatible = "pci0014,7a03.0",
"pci0014,7a03",
"pciclass020000",
- "pciclass0200";
+ "pciclass0200",
+ "loongson, pci-gmac";
reg = <0x1900 0x0 0x0 0x0 0x0>;
interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 39124726bdd9..221c7abdef0e 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -136,6 +136,7 @@ struct sja1105_info {
int (*clocking_setup)(struct sja1105_private *priv);
int (*pcs_mdio_read)(struct mii_bus *bus, int phy, int reg);
int (*pcs_mdio_write)(struct mii_bus *bus, int phy, int reg, u16 val);
+ int (*disable_microcontroller)(struct sja1105_private *priv);
const char *name;
bool supports_mii[SJA1105_MAX_NUM_PORTS];
bool supports_rmii[SJA1105_MAX_NUM_PORTS];
@@ -363,7 +364,7 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port);
int sja1110_setup_rgmii_delay(const void *ctx, int port);
int sja1105_clocking_setup_port(struct sja1105_private *priv, int port);
int sja1105_clocking_setup(struct sja1105_private *priv);
-int sja1110_clocking_setup(struct sja1105_private *priv);
+int sja1110_disable_microcontroller(struct sja1105_private *priv);
/* From sja1105_ethtool.c */
void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data);
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 645edea5a81f..387a1f2f161c 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -6,6 +6,7 @@
#include "sja1105.h"
#define SJA1105_SIZE_CGU_CMD 4
+#define SJA1110_BASE_MCSS_CLK SJA1110_CGU_ADDR(0x70)
#define SJA1110_BASE_TIMER_CLK SJA1110_CGU_ADDR(0x74)
/* Common structure for CFG_PAD_MIIx_RX and CFG_PAD_MIIx_TX */
@@ -832,17 +833,30 @@ sja1110_cgu_outclk_packing(void *buf, struct sja1110_cgu_outclk *outclk,
sja1105_packing(buf, &outclk->pd, 0, 0, size, op);
}
-/* Power down the BASE_TIMER_CLK in order to disable the watchdog */
-int sja1110_clocking_setup(struct sja1105_private *priv)
+int sja1110_disable_microcontroller(struct sja1105_private *priv)
{
u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
+ struct sja1110_cgu_outclk outclk_6_c = {
+ .clksrc = 0x3,
+ .pd = true,
+ };
struct sja1110_cgu_outclk outclk_7_c = {
.clksrc = 0x5,
.pd = true,
};
+ int rc;
+ /* Power down the BASE_TIMER_CLK to disable the watchdog timer */
sja1110_cgu_outclk_packing(packed_buf, &outclk_7_c, PACK);
- return sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_TIMER_CLK,
+ rc = sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_TIMER_CLK,
+ packed_buf, SJA1105_SIZE_CGU_CMD);
+ if (rc)
+ return rc;
+
+ /* Power down the BASE_MCSS_CLOCK to gate the microcontroller off */
+ sja1110_cgu_outclk_packing(packed_buf, &outclk_6_c, PACK);
+
+ return sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_MCSS_CLK,
packed_buf, SJA1105_SIZE_CGU_CMD);
}
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 8e5cdf93c23b..a9777eb564c6 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1886,17 +1886,23 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
mutex_lock(&priv->ptp_data.lock);
rc = __sja1105_ptp_gettimex(ds, &now, &ptp_sts_before);
- if (rc < 0)
- goto out_unlock_ptp;
+ if (rc < 0) {
+ mutex_unlock(&priv->ptp_data.lock);
+ goto out;
+ }
/* Reset switch and send updated static configuration */
rc = sja1105_static_config_upload(priv);
- if (rc < 0)
- goto out_unlock_ptp;
+ if (rc < 0) {
+ mutex_unlock(&priv->ptp_data.lock);
+ goto out;
+ }
rc = __sja1105_ptp_settime(ds, 0, &ptp_sts_after);
- if (rc < 0)
- goto out_unlock_ptp;
+ if (rc < 0) {
+ mutex_unlock(&priv->ptp_data.lock);
+ goto out;
+ }
t1 = timespec64_to_ns(&ptp_sts_before.pre_ts);
t2 = timespec64_to_ns(&ptp_sts_before.post_ts);
@@ -1911,7 +1917,6 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
__sja1105_ptp_adjtime(ds, now);
-out_unlock_ptp:
mutex_unlock(&priv->ptp_data.lock);
dev_info(priv->ds->dev,
@@ -1922,9 +1927,11 @@ out_unlock_ptp:
* For these interfaces there is no dynamic configuration
* needed, since PLLs have same settings at all speeds.
*/
- rc = priv->info->clocking_setup(priv);
- if (rc < 0)
- goto out;
+ if (priv->info->clocking_setup) {
+ rc = priv->info->clocking_setup(priv);
+ if (rc < 0)
+ goto out;
+ }
for (i = 0; i < ds->num_ports; i++) {
struct dw_xpcs *xpcs = priv->xpcs[i];
@@ -3032,18 +3039,34 @@ static int sja1105_setup(struct dsa_switch *ds)
goto out_ptp_clock_unregister;
}
+ if (priv->info->disable_microcontroller) {
+ rc = priv->info->disable_microcontroller(priv);
+ if (rc < 0) {
+ dev_err(ds->dev,
+ "Failed to disable microcontroller: %pe\n",
+ ERR_PTR(rc));
+ goto out_mdiobus_unregister;
+ }
+ }
+
/* Create and send configuration down to device */
rc = sja1105_static_config_load(priv);
if (rc < 0) {
dev_err(ds->dev, "Failed to load static config: %d\n", rc);
goto out_mdiobus_unregister;
}
+
/* Configure the CGU (PHY link modes and speeds) */
- rc = priv->info->clocking_setup(priv);
- if (rc < 0) {
- dev_err(ds->dev, "Failed to configure MII clocking: %d\n", rc);
- goto out_static_config_free;
+ if (priv->info->clocking_setup) {
+ rc = priv->info->clocking_setup(priv);
+ if (rc < 0) {
+ dev_err(ds->dev,
+ "Failed to configure MII clocking: %pe\n",
+ ERR_PTR(rc));
+ goto out_static_config_free;
+ }
}
+
/* On SJA1105, VLAN filtering per se is always enabled in hardware.
* The only thing we can do to disable it is lie about what the 802.1Q
* EtherType is.
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 4aed16d23f21..08cc5dbf2fa6 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -199,7 +199,11 @@ static int sja1110_reset_cmd(struct dsa_switch *ds)
const struct sja1105_regs *regs = priv->info->regs;
u32 switch_reset = BIT(20);
- /* Switch core reset */
+ /* Only reset the switch core.
+ * A full cold reset would re-enable the BASE_MCSS_CLOCK PLL which
+ * would turn on the microcontroller, potentially letting it execute
+ * code which could interfere with our configuration.
+ */
return sja1105_xfer_u32(priv, SPI_WRITE, regs->rgu, &switch_reset, NULL);
}
@@ -796,7 +800,7 @@ const struct sja1105_info sja1110a_info = {
.ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing,
.rxtstamp = sja1110_rxtstamp,
.txtstamp = sja1110_txtstamp,
- .clocking_setup = sja1110_clocking_setup,
+ .disable_microcontroller = sja1110_disable_microcontroller,
.pcs_mdio_read = sja1110_pcs_mdio_read,
.pcs_mdio_write = sja1110_pcs_mdio_write,
.port_speed = {
@@ -847,7 +851,7 @@ const struct sja1105_info sja1110b_info = {
.ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing,
.rxtstamp = sja1110_rxtstamp,
.txtstamp = sja1110_txtstamp,
- .clocking_setup = sja1110_clocking_setup,
+ .disable_microcontroller = sja1110_disable_microcontroller,
.pcs_mdio_read = sja1110_pcs_mdio_read,
.pcs_mdio_write = sja1110_pcs_mdio_write,
.port_speed = {
@@ -898,7 +902,7 @@ const struct sja1105_info sja1110c_info = {
.ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing,
.rxtstamp = sja1110_rxtstamp,
.txtstamp = sja1110_txtstamp,
- .clocking_setup = sja1110_clocking_setup,
+ .disable_microcontroller = sja1110_disable_microcontroller,
.pcs_mdio_read = sja1110_pcs_mdio_read,
.pcs_mdio_write = sja1110_pcs_mdio_write,
.port_speed = {
@@ -949,7 +953,7 @@ const struct sja1105_info sja1110d_info = {
.ptp_cmd_packing = sja1105pqrs_ptp_cmd_packing,
.rxtstamp = sja1110_rxtstamp,
.txtstamp = sja1110_txtstamp,
- .clocking_setup = sja1110_clocking_setup,
+ .disable_microcontroller = sja1110_disable_microcontroller,
.pcs_mdio_read = sja1110_pcs_mdio_read,
.pcs_mdio_write = sja1110_pcs_mdio_write,
.port_speed = {
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 1491b72008f3..7a422ef4deb6 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -1052,8 +1052,7 @@ sja1105_static_config_check_valid(const struct sja1105_static_config *config,
(tables[blk_idx].entry_count == tables[blk_idx].ops->max_entry_count)
if (tables[BLK_IDX_SCHEDULE].entry_count) {
- if (config->device_id != SJA1105T_DEVICE_ID &&
- config->device_id != SJA1105QS_DEVICE_ID)
+ if (!tables[BLK_IDX_SCHEDULE].ops->max_entry_count)
return SJA1105_TTETHERNET_NOT_SUPPORTED;
if (tables[BLK_IDX_SCHEDULE_ENTRY_POINTS].entry_count == 0)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 51bbf5f760c5..cdb5f14fb6bc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3537,21 +3537,33 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
int size = le16_to_cpu(desc->rx.size);
u32 truesize = hns3_buf_size(ring);
u32 frag_size = size - pull_len;
+ bool reused;
/* Avoid re-using remote or pfmem page */
if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
goto out;
- /* Stack is not using and current page_offset is non-zero, we can
- * reuse from the zero offset.
+ reused = hns3_can_reuse_page(desc_cb);
+
+ /* Rx page can be reused when:
+ * 1. Rx page is only owned by the driver when page_offset
+ * is zero, which means 0 @ truesize will be used by
+ * stack after skb_add_rx_frag() is called, and the rest
+ * of rx page can be reused by driver.
+ * Or
+ * 2. Rx page is only owned by the driver when page_offset
+ * is non-zero, which means page_offset @ truesize will
+ * be used by stack after skb_add_rx_frag() is called,
+ * and 0 @ truesize can be reused by driver.
*/
- if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) {
- desc_cb->page_offset = 0;
- desc_cb->reuse_flag = 1;
- } else if (desc_cb->page_offset + truesize * 2 <=
- hns3_page_size(ring)) {
+ if ((!desc_cb->page_offset && reused) ||
+ ((desc_cb->page_offset + truesize + truesize) <=
+ hns3_page_size(ring) && desc_cb->page_offset)) {
desc_cb->page_offset += truesize;
desc_cb->reuse_flag = 1;
+ } else if (desc_cb->page_offset && reused) {
+ desc_cb->page_offset = 0;
+ desc_cb->reuse_flag = 1;
} else if (frag_size <= ring->rx_copybreak) {
void *frag = napi_alloc_frag(frag_size);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 87d7c6ab047f..68633145a8b8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -123,11 +123,8 @@ static bool npc_is_field_present(struct rvu *rvu, enum key_fields type, u8 intf)
static bool npc_is_same(struct npc_key_field *input,
struct npc_key_field *field)
{
- int ret;
-
- ret = memcmp(&input->layer_mdata, &field->layer_mdata,
- sizeof(struct npc_layer_mdata));
- return ret == 0;
+ return memcmp(&input->layer_mdata, &field->layer_mdata,
+ sizeof(struct npc_layer_mdata)) == 0;
}
static void npc_set_layer_mdata(struct npc_mcam *mcam, enum key_fields type,
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index e967867828d8..9b48ae4bac39 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1528,6 +1528,7 @@ static int pxa168_eth_remove(struct platform_device *pdev)
struct net_device *dev = platform_get_drvdata(pdev);
struct pxa168_eth_private *pep = netdev_priv(dev);
+ cancel_work_sync(&pep->tx_timeout_task);
if (pep->htpr) {
dma_free_coherent(pep->dev->dev.parent, HASH_ADDR_TABLE_SIZE,
pep->htpr, pep->htpr_dma);
@@ -1539,7 +1540,6 @@ static int pxa168_eth_remove(struct platform_device *pdev)
clk_disable_unprepare(pep->clk);
mdiobus_unregister(pep->smi_bus);
mdiobus_free(pep->smi_bus);
- cancel_work_sync(&pep->tx_timeout_task);
unregister_netdev(dev);
free_netdev(dev);
return 0;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 0528b8f49061..82eef4c72f01 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3678,10 +3678,9 @@ static int vxge_config_vpaths(struct vxge_hw_device_config *device_config,
driver_config->vpath_per_dev = 1;
for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++)
- if (!vxge_bVALn(vpath_mask, i, 1))
- continue;
- else
+ if (vxge_bVALn(vpath_mask, i, 1))
default_no_vpath++;
+
if (default_no_vpath < driver_config->vpath_per_dev)
driver_config->vpath_per_dev = default_no_vpath;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 8a31ce29ecfc..14282472c7a6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -3341,9 +3341,6 @@ qlcnic_can_start_firmware(struct qlcnic_adapter *adapter)
do {
msleep(1000);
prev_state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
-
- if (prev_state == QLCNIC_DEV_QUISCENT)
- continue;
} while ((prev_state != QLCNIC_DEV_READY) && --dev_init_timeo);
if (!dev_init_timeo) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 9a19e4d9da02..ac3c248d4f9b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -250,6 +250,15 @@ config DWMAC_INTEL
This selects the Intel platform specific bus support for the
stmmac driver. This driver is used for Intel Quark/EHL/TGL.
+config DWMAC_LOONGSON
+ tristate "Loongson PCI DWMAC support"
+ default MACH_LOONGSON64
+ depends on STMMAC_ETH && PCI
+ depends on COMMON_CLK
+ help
+ This selects the LOONGSON PCI bus support for the stmmac driver,
+ Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge.
+
config STMMAC_PCI
tristate "STMMAC PCI bus support"
depends on STMMAC_ETH && PCI
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 6471f93889ee..d4e12e9ace4f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -37,4 +37,5 @@ dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o
+obj-$(CONFIG_DWMAC_LOONGSON) += dwmac-loongson.o
stmmac-pci-objs:= stmmac_pci.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
new file mode 100644
index 000000000000..8cd4e2e8ec40
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Loongson Corporation
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/pci.h>
+#include <linux/dmi.h>
+#include <linux/device.h>
+#include <linux/of_irq.h>
+#include "stmmac.h"
+
+static int loongson_default_data(struct plat_stmmacenet_data *plat)
+{
+ plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
+ plat->has_gmac = 1;
+ plat->force_sf_dma_mode = 1;
+
+ /* Set default value for multicast hash bins */
+ plat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+ /* Set default value for unicast filter entries */
+ plat->unicast_filter_entries = 1;
+
+ /* Set the maxmtu to a default of JUMBO_LEN */
+ plat->maxmtu = JUMBO_LEN;
+
+ /* Set default number of RX and TX queues to use */
+ plat->tx_queues_to_use = 1;
+ plat->rx_queues_to_use = 1;
+
+ /* Disable Priority config by default */
+ plat->tx_queues_cfg[0].use_prio = false;
+ plat->rx_queues_cfg[0].use_prio = false;
+
+ /* Disable RX queues routing by default */
+ plat->rx_queues_cfg[0].pkt_route = 0x0;
+
+ /* Default to phy auto-detection */
+ plat->phy_addr = -1;
+
+ plat->dma_cfg->pbl = 32;
+ plat->dma_cfg->pblx8 = true;
+
+ plat->multicast_filter_bins = 256;
+ return 0;
+}
+
+static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct plat_stmmacenet_data *plat;
+ struct stmmac_resources res;
+ int ret, i, mdio;
+ struct device_node *np;
+
+ np = dev_of_node(&pdev->dev);
+
+ if (!np) {
+ pr_info("dwmac_loongson_pci: No OF node\n");
+ return -ENODEV;
+ }
+
+ if (!of_device_is_compatible(np, "loongson, pci-gmac")) {
+ pr_info("dwmac_loongson_pci: Incompatible OF node\n");
+ return -ENODEV;
+ }
+
+ plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
+ if (!plat)
+ return -ENOMEM;
+
+ if (plat->mdio_node) {
+ dev_err(&pdev->dev, "Found MDIO subnode\n");
+ mdio = true;
+ }
+
+ if (mdio) {
+ plat->mdio_bus_data = devm_kzalloc(&pdev->dev,
+ sizeof(*plat->mdio_bus_data),
+ GFP_KERNEL);
+ if (!plat->mdio_bus_data)
+ return -ENOMEM;
+ plat->mdio_bus_data->needs_reset = true;
+ }
+
+ plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL);
+ if (!plat->dma_cfg)
+ return -ENOMEM;
+
+ /* Enable pci device */
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__);
+ return ret;
+ }
+
+ /* Get the base address of device */
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (pci_resource_len(pdev, i) == 0)
+ continue;
+ ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+ if (ret)
+ return ret;
+ break;
+ }
+
+ plat->bus_id = of_alias_get_id(np, "ethernet");
+ if (plat->bus_id < 0)
+ plat->bus_id = pci_dev_id(pdev);
+
+ plat->phy_interface = device_get_phy_mode(&pdev->dev);
+ if (plat->phy_interface < 0)
+ dev_err(&pdev->dev, "phy_mode not found\n");
+
+ plat->interface = PHY_INTERFACE_MODE_GMII;
+
+ pci_set_master(pdev);
+
+ loongson_default_data(plat);
+ pci_enable_msi(pdev);
+ memset(&res, 0, sizeof(res));
+ res.addr = pcim_iomap_table(pdev)[0];
+
+ res.irq = of_irq_get_byname(np, "macirq");
+ if (res.irq < 0) {
+ dev_err(&pdev->dev, "IRQ macirq not found\n");
+ ret = -ENODEV;
+ }
+
+ res.wol_irq = of_irq_get_byname(np, "eth_wake_irq");
+ if (res.wol_irq < 0) {
+ dev_info(&pdev->dev, "IRQ eth_wake_irq not found, using macirq\n");
+ res.wol_irq = res.irq;
+ }
+
+ res.lpi_irq = of_irq_get_byname(np, "eth_lpi");
+ if (res.lpi_irq < 0) {
+ dev_err(&pdev->dev, "IRQ eth_lpi not found\n");
+ ret = -ENODEV;
+ }
+
+ return stmmac_dvr_probe(&pdev->dev, plat, &res);
+}
+
+static void loongson_dwmac_remove(struct pci_dev *pdev)
+{
+ int i;
+
+ stmmac_dvr_remove(&pdev->dev);
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (pci_resource_len(pdev, i) == 0)
+ continue;
+ pcim_iounmap_regions(pdev, BIT(i));
+ break;
+ }
+
+ pci_disable_device(pdev);
+}
+
+static int __maybe_unused loongson_dwmac_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ ret = stmmac_suspend(dev);
+ if (ret)
+ return ret;
+
+ ret = pci_save_state(pdev);
+ if (ret)
+ return ret;
+
+ pci_disable_device(pdev);
+ pci_wake_from_d3(pdev, true);
+ return 0;
+}
+
+static int __maybe_unused loongson_dwmac_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ pci_restore_state(pdev);
+ pci_set_power_state(pdev, PCI_D0);
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ pci_set_master(pdev);
+
+ return stmmac_resume(dev);
+}
+
+static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend,
+ loongson_dwmac_resume);
+
+static const struct pci_device_id loongson_dwmac_id_table[] = {
+ { PCI_VDEVICE(LOONGSON, 0x7a03) },
+ {}
+};
+MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table);
+
+struct pci_driver loongson_dwmac_driver = {
+ .name = "dwmac-loongson-pci",
+ .id_table = loongson_dwmac_id_table,
+ .probe = loongson_dwmac_probe,
+ .remove = loongson_dwmac_remove,
+ .driver = {
+ .pm = &loongson_dwmac_pm_ops,
+ },
+};
+
+module_pci_driver(loongson_dwmac_driver);
+
+MODULE_DESCRIPTION("Loongson DWMAC PCI driver");
+MODULE_AUTHOR("Qing Zhang <zhangqing@loongson.cn>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 4e70efc45458..92dab609d4f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -573,10 +573,8 @@ static int tc_add_flow(struct stmmac_priv *priv,
for (i = 0; i < ARRAY_SIZE(tc_flow_parsers); i++) {
ret = tc_flow_parsers[i].fn(priv, cls, entry);
- if (!ret) {
+ if (!ret)
entry->in_use = true;
- continue;
- }
}
if (!entry->in_use)
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index d85521989753..6348307bfa84 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -269,9 +269,9 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
err = PTR_ERR(nsim_dev->nodes_ddir);
goto err_out;
}
- debugfs_create_bool("fail_trap_counter_get", 0600,
+ debugfs_create_bool("fail_trap_drop_counter_get", 0600,
nsim_dev->ddir,
- &nsim_dev->fail_trap_counter_get);
+ &nsim_dev->fail_trap_drop_counter_get);
nsim_udp_tunnels_debugfs_create(nsim_dev);
return 0;
@@ -1208,14 +1208,14 @@ static int nsim_rate_node_parent_set(struct devlink_rate *child,
}
static int
-nsim_dev_devlink_trap_hw_counter_get(struct devlink *devlink,
- const struct devlink_trap *trap,
- u64 *p_drops)
+nsim_dev_devlink_trap_drop_counter_get(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ u64 *p_drops)
{
struct nsim_dev *nsim_dev = devlink_priv(devlink);
u64 *cnt;
- if (nsim_dev->fail_trap_counter_get)
+ if (nsim_dev->fail_trap_drop_counter_get)
return -EINVAL;
cnt = &nsim_dev->trap_data->trap_pkt_cnt;
@@ -1247,7 +1247,7 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
.rate_node_del = nsim_rate_node_del,
.rate_leaf_parent_set = nsim_rate_leaf_parent_set,
.rate_node_parent_set = nsim_rate_node_parent_set,
- .trap_drop_counter_get = nsim_dev_devlink_trap_hw_counter_get,
+ .trap_drop_counter_get = nsim_dev_devlink_trap_drop_counter_get,
};
#define NSIM_DEV_MAX_MACS_DEFAULT 32
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index f2304e61919a..ae462957dcee 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -249,7 +249,7 @@ struct nsim_dev {
bool fail_trap_group_set;
bool fail_trap_policer_set;
bool fail_trap_policer_counter_get;
- bool fail_trap_counter_get;
+ bool fail_trap_drop_counter_get;
struct {
struct udp_tunnel_nic_shared utn_shared;
u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 6c05c4c8914a..fd61a7cc4fdf 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Comtrol SV11 card driver
+/* Comtrol SV11 card driver
*
* This is a slightly odd Z85230 synchronous driver. All you need to
* know basically is
@@ -9,7 +8,7 @@
*
* It supports DMA using two DMA channels in SYNC mode. The driver doesn't
* use these facilities
- *
+ *
* The control port is at io+1, the data at io+3 and turning off the DMA
* is done by writing 0 to io+4
*
@@ -44,17 +43,15 @@
static int dma;
-/*
- * Network driver support routines
+/* Network driver support routines
*/
-static inline struct z8530_dev* dev_to_sv(struct net_device *dev)
+static inline struct z8530_dev *dev_to_sv(struct net_device *dev)
{
return (struct z8530_dev *)dev_to_hdlc(dev)->priv;
}
-/*
- * Frame receive. Simple for our card as we do HDLC and there
+/* Frame receive. Simple for our card as we do HDLC and there
* is no funny garbage involved
*/
@@ -65,15 +62,13 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
skb->protocol = hdlc_type_trans(skb, c->netdevice);
skb_reset_mac_header(skb);
skb->dev = c->netdevice;
- /*
- * Send it to the PPP layer. We don't have time to process
+ /* Send it to the PPP layer. We don't have time to process
* it right now.
*/
netif_rx(skb);
}
-/*
- * We've been placed in the UP state
+/* We've been placed in the UP state
*/
static int hostess_open(struct net_device *d)
@@ -81,19 +76,18 @@ static int hostess_open(struct net_device *d)
struct z8530_dev *sv11 = dev_to_sv(d);
int err = -1;
- /*
- * Link layer up
+ /* Link layer up
*/
switch (dma) {
- case 0:
- err = z8530_sync_open(d, &sv11->chanA);
- break;
- case 1:
- err = z8530_sync_dma_open(d, &sv11->chanA);
- break;
- case 2:
- err = z8530_sync_txdma_open(d, &sv11->chanA);
- break;
+ case 0:
+ err = z8530_sync_open(d, &sv11->chanA);
+ break;
+ case 1:
+ err = z8530_sync_dma_open(d, &sv11->chanA);
+ break;
+ case 2:
+ err = z8530_sync_txdma_open(d, &sv11->chanA);
+ break;
}
if (err)
@@ -102,15 +96,15 @@ static int hostess_open(struct net_device *d)
err = hdlc_open(d);
if (err) {
switch (dma) {
- case 0:
- z8530_sync_close(d, &sv11->chanA);
- break;
- case 1:
- z8530_sync_dma_close(d, &sv11->chanA);
- break;
- case 2:
- z8530_sync_txdma_close(d, &sv11->chanA);
- break;
+ case 0:
+ z8530_sync_close(d, &sv11->chanA);
+ break;
+ case 1:
+ z8530_sync_dma_close(d, &sv11->chanA);
+ break;
+ case 2:
+ z8530_sync_txdma_close(d, &sv11->chanA);
+ break;
}
return err;
}
@@ -127,8 +121,7 @@ static int hostess_open(struct net_device *d)
static int hostess_close(struct net_device *d)
{
struct z8530_dev *sv11 = dev_to_sv(d);
- /*
- * Discard new frames
+ /* Discard new frames
*/
sv11->chanA.rx_function = z8530_null_rx;
@@ -136,32 +129,29 @@ static int hostess_close(struct net_device *d)
netif_stop_queue(d);
switch (dma) {
- case 0:
- z8530_sync_close(d, &sv11->chanA);
- break;
- case 1:
- z8530_sync_dma_close(d, &sv11->chanA);
- break;
- case 2:
- z8530_sync_txdma_close(d, &sv11->chanA);
- break;
+ case 0:
+ z8530_sync_close(d, &sv11->chanA);
+ break;
+ case 1:
+ z8530_sync_dma_close(d, &sv11->chanA);
+ break;
+ case 2:
+ z8530_sync_txdma_close(d, &sv11->chanA);
+ break;
}
return 0;
}
static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
{
- /* struct z8530_dev *sv11=dev_to_sv(d);
- z8530_ioctl(d,&sv11->chanA,ifr,cmd) */
return hdlc_ioctl(d, ifr, cmd);
}
-/*
- * Passed network frames, fire them downwind.
+/* Passed network frames, fire them downwind.
*/
static netdev_tx_t hostess_queue_xmit(struct sk_buff *skb,
- struct net_device *d)
+ struct net_device *d)
{
return z8530_queue_xmit(&dev_to_sv(d)->chanA, skb);
}
@@ -174,8 +164,7 @@ static int hostess_attach(struct net_device *dev, unsigned short encoding,
return -EINVAL;
}
-/*
- * Description block for a Comtrol Hostess SV11 card
+/* Description block for a Comtrol Hostess SV11 card
*/
static const struct net_device_ops hostess_ops = {
@@ -189,8 +178,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
{
struct z8530_dev *sv;
struct net_device *netdev;
- /*
- * Get the needed I/O space
+ /* Get the needed I/O space
*/
if (!request_region(iobase, 8, "Comtrol SV11")) {
@@ -202,8 +190,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
if (!sv)
goto err_kzalloc;
- /*
- * Stuff in the I/O addressing
+ /* Stuff in the I/O addressing
*/
sv->active = 0;
@@ -218,7 +205,8 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
outb(0, iobase + 4); /* DMA off */
/* We want a fast IRQ for this device. Actually we'd like an even faster
- IRQ ;) - This is one driver RtLinux is made for */
+ * IRQ ;) - This is one driver RtLinux is made for
+ */
if (request_irq(irq, z8530_interrupt, 0,
"Hostess SV11", sv) < 0) {
@@ -232,8 +220,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
sv->chanB.dev = sv;
if (dma) {
- /*
- * You can have DMA off or 1 and 3 thats the lot
+ /* You can have DMA off or 1 and 3 thats the lot
* on the Comtrol.
*/
sv->chanA.txdma = 3;
@@ -248,11 +235,11 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
}
/* Kill our private IRQ line the hostess can end up chattering
- until the configuration is set */
+ * until the configuration is set
+ */
disable_irq(irq);
- /*
- * Begin normal initialise
+ /* Begin normal initialise
*/
if (z8530_init(sv)) {
@@ -268,8 +255,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
enable_irq(irq);
- /*
- * Now we can take the IRQ
+ /* Now we can take the IRQ
*/
sv->chanA.netdevice = netdev = alloc_hdlcdev(sv);
@@ -340,7 +326,8 @@ static struct z8530_dev *sv11_unit;
int init_module(void)
{
- if ((sv11_unit = sv11_init(io, irq)) == NULL)
+ sv11_unit = sv11_init(io, irq);
+ if (!sv11_unit)
return -ENODEV;
return 0;
}
diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c
index a0ce95a287c5..2b0c7232e91f 100644
--- a/drivers/nfc/nxp-nci/core.c
+++ b/drivers/nfc/nxp-nci/core.c
@@ -70,21 +70,16 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
struct nxp_nci_info *info = nci_get_drvdata(ndev);
int r;
- if (!info->phy_ops->write) {
- r = -ENOTSUPP;
- goto send_exit;
- }
+ if (!info->phy_ops->write)
+ return -EOPNOTSUPP;
- if (info->mode != NXP_NCI_MODE_NCI) {
- r = -EINVAL;
- goto send_exit;
- }
+ if (info->mode != NXP_NCI_MODE_NCI)
+ return -EINVAL;
r = info->phy_ops->write(info->phy_id, skb);
if (r < 0)
kfree_skb(skb);
-send_exit:
return r;
}
@@ -104,10 +99,8 @@ int nxp_nci_probe(void *phy_id, struct device *pdev,
int r;
info = devm_kzalloc(pdev, sizeof(struct nxp_nci_info), GFP_KERNEL);
- if (!info) {
- r = -ENOMEM;
- goto probe_exit;
- }
+ if (!info)
+ return -ENOMEM;
info->phy_id = phy_id;
info->pdev = pdev;
@@ -120,31 +113,25 @@ int nxp_nci_probe(void *phy_id, struct device *pdev,
if (info->phy_ops->set_mode) {
r = info->phy_ops->set_mode(info->phy_id, NXP_NCI_MODE_COLD);
if (r < 0)
- goto probe_exit;
+ return r;
}
info->mode = NXP_NCI_MODE_COLD;
info->ndev = nci_allocate_device(&nxp_nci_ops, NXP_NCI_NFC_PROTOCOLS,
NXP_NCI_HDR_LEN, 0);
- if (!info->ndev) {
- r = -ENOMEM;
- goto probe_exit;
- }
+ if (!info->ndev)
+ return -ENOMEM;
nci_set_parent_dev(info->ndev, pdev);
nci_set_drvdata(info->ndev, info);
r = nci_register_device(info->ndev);
- if (r < 0)
- goto probe_exit_free_nci;
+ if (r < 0) {
+ nci_free_device(info->ndev);
+ return r;
+ }
*ndev = info->ndev;
-
- goto probe_exit;
-
-probe_exit_free_nci:
- nci_free_device(info->ndev);
-probe_exit:
return r;
}
EXPORT_SYMBOL(nxp_nci_probe);
diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c
index dae0c8030e95..119bf305c642 100644
--- a/drivers/nfc/nxp-nci/firmware.c
+++ b/drivers/nfc/nxp-nci/firmware.c
@@ -95,10 +95,8 @@ static int nxp_nci_fw_send_chunk(struct nxp_nci_info *info)
int r;
skb = nci_skb_alloc(info->ndev, info->max_payload, GFP_KERNEL);
- if (!skb) {
- r = -ENOMEM;
- goto chunk_exit;
- }
+ if (!skb)
+ return -ENOMEM;
chunk_len = info->max_payload - NXP_NCI_FW_HDR_LEN - NXP_NCI_FW_CRC_LEN;
remaining_len = fw_info->frame_size - fw_info->written;
@@ -124,7 +122,6 @@ static int nxp_nci_fw_send_chunk(struct nxp_nci_info *info)
kfree_skb(skb);
-chunk_exit:
return r;
}
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 83f23774b908..d61bbbf11979 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -23,6 +23,7 @@ struct mptcp_ext {
u64 data_seq;
u32 subflow_seq;
u16 data_len;
+ __sum16 csum;
u8 use_map:1,
dsn64:1,
data_fin:1,
@@ -31,7 +32,8 @@ struct mptcp_ext {
mpc_map:1,
frozen:1,
reset_transient:1;
- u8 reset_reason:4;
+ u8 reset_reason:4,
+ csum_reqd:1;
};
#define MPTCP_RM_IDS_MAX 8
@@ -63,8 +65,9 @@ struct mptcp_out_options {
struct mptcp_rm_list rm_list;
u8 join_id;
u8 backup;
- u8 reset_reason:4;
- u8 reset_transient:1;
+ u8 reset_reason:4,
+ reset_transient:1,
+ csum_reqd:1;
u32 nonce;
u64 thmac;
u32 token;
diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index 775a46d0b0f0..6bf43176f14c 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -73,6 +73,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u64, data_seq)
__field(u32, subflow_seq)
__field(u16, data_len)
+ __field(u16, csum)
__field(u8, use_map)
__field(u8, dsn64)
__field(u8, data_fin)
@@ -82,6 +83,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u8, frozen)
__field(u8, reset_transient)
__field(u8, reset_reason)
+ __field(u8, csum_reqd)
),
TP_fast_assign(
@@ -89,6 +91,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->data_seq = mpext->data_seq;
__entry->subflow_seq = mpext->subflow_seq;
__entry->data_len = mpext->data_len;
+ __entry->csum = (__force u16)mpext->csum;
__entry->use_map = mpext->use_map;
__entry->dsn64 = mpext->dsn64;
__entry->data_fin = mpext->data_fin;
@@ -98,16 +101,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->frozen = mpext->frozen;
__entry->reset_transient = mpext->reset_transient;
__entry->reset_reason = mpext->reset_reason;
+ __entry->csum_reqd = mpext->csum_reqd;
),
- TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u",
+ TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u",
__entry->data_ack, __entry->data_seq,
__entry->subflow_seq, __entry->data_len,
- __entry->use_map, __entry->dsn64,
- __entry->data_fin, __entry->use_ack,
- __entry->ack64, __entry->mpc_map,
- __entry->frozen, __entry->reset_transient,
- __entry->reset_reason)
+ __entry->csum, __entry->use_map,
+ __entry->dsn64, __entry->data_fin,
+ __entry->use_ack, __entry->ack64,
+ __entry->mpc_map, __entry->frozen,
+ __entry->reset_transient, __entry->reset_reason,
+ __entry->csum_reqd)
);
DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status,
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 8eb3c0844bff..7b05f7102321 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -105,6 +105,7 @@ struct mptcp_info {
__u64 mptcpi_rcv_nxt;
__u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max;
+ __u8 mptcpi_csum_enabled;
};
/*
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index 5ae3ace84de0..332b18f318f8 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -64,6 +64,8 @@ enum {
SEG6_LOCAL_ACTION_END_AM = 14,
/* custom BPF action */
SEG6_LOCAL_ACTION_END_BPF = 15,
+ /* decap and lookup of DA in v4 or v6 table */
+ SEG6_LOCAL_ACTION_END_DT46 = 16,
__SEG6_LOCAL_ACTION_MAX,
};
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index e3f6ff05a528..1a705a4ef7fa 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -108,7 +108,8 @@ static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev)
netdev_features_t ret;
ret = real_dev->hw_enc_features &
- (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL);
+ (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL);
if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK))
return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index da3256a3eed0..8789a57af543 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -113,9 +113,7 @@ static void __vlan_add_list(struct net_bridge_vlan *v)
headp = &vg->vlan_list;
list_for_each_prev(hpos, headp) {
vent = list_entry(hpos, struct net_bridge_vlan, vlist);
- if (v->vid < vent->vid)
- continue;
- else
+ if (v->vid >= vent->vid)
break;
}
list_add_rcu(&v->vlist, hpos);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cac30e676ac9..23267c8db7c4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -480,7 +480,7 @@ got_phyid:
phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
if (!phyinfo) {
res = -ENOMEM;
- goto out_err;
+ goto out;
}
phy_layer->id = phyid;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 4ff38cb08f4b..60bf3b877957 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -87,10 +87,10 @@ struct seg6_end_dt_info {
int vrf_ifindex;
int vrf_table;
- /* tunneled packet proto and family (IPv4 or IPv6) */
- __be16 proto;
+ /* tunneled packet family (IPv4 or IPv6).
+ * Protocol and header length are inferred from family.
+ */
u16 family;
- int hdrlen;
};
struct pcpu_seg6_local_counters {
@@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
info->net = net;
info->vrf_ifindex = vrf_ifindex;
- switch (family) {
- case AF_INET:
- info->proto = htons(ETH_P_IP);
- info->hdrlen = sizeof(struct iphdr);
- break;
- case AF_INET6:
- info->proto = htons(ETH_P_IPV6);
- info->hdrlen = sizeof(struct ipv6hdr);
- break;
- default:
- return -EINVAL;
- }
-
info->family = family;
info->mode = DT_VRF_MODE;
@@ -622,22 +609,44 @@ error:
}
static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+ struct seg6_local_lwt *slwt, u16 family)
{
struct seg6_end_dt_info *info = &slwt->dt_info;
struct net_device *vrf;
+ __be16 protocol;
+ int hdrlen;
vrf = end_dt_get_vrf_rcu(skb, info);
if (unlikely(!vrf))
goto drop;
- skb->protocol = info->proto;
+ switch (family) {
+ case AF_INET:
+ protocol = htons(ETH_P_IP);
+ hdrlen = sizeof(struct iphdr);
+ break;
+ case AF_INET6:
+ protocol = htons(ETH_P_IPV6);
+ hdrlen = sizeof(struct ipv6hdr);
+ break;
+ case AF_UNSPEC:
+ fallthrough;
+ default:
+ goto drop;
+ }
+
+ if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
+ pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
+ goto drop;
+ }
+
+ skb->protocol = protocol;
skb_dst_drop(skb);
- skb_set_transport_header(skb, info->hdrlen);
+ skb_set_transport_header(skb, hdrlen);
- return end_dt_vrf_rcv(skb, info->family, vrf);
+ return end_dt_vrf_rcv(skb, family, vrf);
drop:
kfree_skb(skb);
@@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
goto legacy_mode;
/* DT6_VRF_MODE */
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET6);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -767,6 +776,36 @@ drop:
return -EINVAL;
}
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
+}
+
+static int input_action_end_dt46(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ unsigned int off = 0;
+ int nexthdr;
+
+ nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
+ if (unlikely(nexthdr < 0))
+ goto drop;
+
+ switch (nexthdr) {
+ case IPPROTO_IPIP:
+ return input_action_end_dt4(skb, slwt);
+ case IPPROTO_IPV6:
+ return input_action_end_dt6(skb, slwt);
+ }
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+#endif
+
/* push an SRH on top of the current one */
static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
@@ -969,6 +1008,17 @@ static struct seg6_action_desc seg6_action_table[] = {
.input = input_action_end_dt6,
},
{
+ .action = SEG6_LOCAL_ACTION_END_DT46,
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .input = input_action_end_dt46,
+ .slwt_ops = {
+ .build_state = seg6_end_dt46_build,
+ },
+#endif
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
.optattrs = SEG6_F_LOCAL_COUNTERS,
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 1ec4d36a39f0..6c2639bb9c19 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -23,6 +23,7 @@ struct mptcp_pernet {
u8 mptcp_enabled;
unsigned int add_addr_timeout;
+ u8 checksum_enabled;
};
static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
@@ -40,10 +41,16 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net)
return mptcp_get_pernet(net)->add_addr_timeout;
}
+int mptcp_is_checksum_enabled(struct net *net)
+{
+ return mptcp_get_pernet(net)->checksum_enabled;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->checksum_enabled = 0;
}
#ifdef CONFIG_SYSCTL
@@ -65,6 +72,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "checksum_enabled",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
{}
};
@@ -82,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout;
+ table[2].data = &pernet->checksum_enabled;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index eb2dc6dbe212..e7e60bc1fb96 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+ SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index f0da4f060fe1..92e56c0cfbdd 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
+ MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f16d9b5ee978..8f88ddeab6a2 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
unlock_sock_fast(sk, slow);
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 6b825fb3fa83..1aec01686c1a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb,
else
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
}
- if (opsize != expected_opsize)
+
+ /* Cfr RFC 8684 Section 3.3.0:
+ * If a checksum is present but its use had
+ * not been negotiated in the MP_CAPABLE handshake, the receiver MUST
+ * close the subflow with a RST, as it is not behaving as negotiated.
+ * If a checksum is not present when its use has been negotiated, the
+ * receiver MUST close the subflow with a RST, as it is considered
+ * broken
+ * We parse even option with mismatching csum presence, so that
+ * later in subflow_data_ready we can trigger the reset.
+ */
+ if (opsize != expected_opsize &&
+ (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
+ opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
break;
/* try to be gentle vs future versions on the initial syn */
@@ -66,16 +79,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* host requires the use of checksums, checksums MUST be used.
* In other words, the only way for checksums not to be used
* is if both hosts in their SYNs set A=0."
- *
- * Section 3.3.0:
- * "If a checksum is not present when its use has been
- * negotiated, the receiver MUST close the subflow with a RST as
- * it is considered broken."
- *
- * We don't implement DSS checksum - fall back to TCP.
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
- break;
+ mp_opt->csum_reqd = 1;
mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@ -86,7 +92,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
}
- if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+ if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
/* Section 3.1.:
* "the data parameters in a MP_CAPABLE are semantically
* equivalent to those in a DSS option and can be used
@@ -98,9 +104,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
- pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+ if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum_reqd = 1;
+ ptr += 2;
+ }
+ pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
version, flags, opsize, mp_opt->sndr_key,
- mp_opt->rcvr_key, mp_opt->data_len);
+ mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
break;
case MPTCPOPT_MP_JOIN:
@@ -171,10 +182,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
}
- /* RFC 6824, Section 3.3:
- * If a checksum is present, but its use had
- * not been negotiated in the MP_CAPABLE handshake,
- * the checksum field MUST be ignored.
+ /* Always parse any csum presence combination, we will enforce
+ * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
*/
if (opsize != expected_opsize &&
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
@@ -209,9 +218,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+ if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+ mp_opt->csum_reqd = 1;
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ ptr += 2;
+ }
+
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq,
- mp_opt->data_len);
+ mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
}
break;
@@ -323,9 +338,12 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
}
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
const struct tcphdr *th = tcp_hdr(skb);
const unsigned char *ptr;
int length;
@@ -341,6 +359,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->dss = 0;
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
+ mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -380,6 +399,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
+ opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -435,8 +455,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_ext *mpext;
unsigned int data_len;
+ u8 len;
/* When skb is not available, we better over-estimate the emitted
* options len. A full DSS option (28 bytes) is longer than
@@ -465,16 +487,26 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
* packets that start the first subflow of an MPTCP connection,
* as well as the first packet that carries data
*/
- if (data_len > 0)
- *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
- else
+ if (data_len > 0) {
+ len = TCPOLEN_MPTCP_MPC_ACK_DATA;
+ if (opts->csum_reqd) {
+ /* we need to propagate more info to csum the pseudo hdr */
+ opts->ext_copy.data_seq = mpext->data_seq;
+ opts->ext_copy.subflow_seq = mpext->subflow_seq;
+ opts->ext_copy.csum = mpext->csum;
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+ *size = ALIGN(len, 4);
+ } else {
*size = TCPOLEN_MPTCP_MPC_ACK;
+ }
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
subflow, subflow->local_key, subflow->remote_key,
@@ -535,18 +567,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false;
u64 ack_seq;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
- unsigned int map_size;
+ unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
- map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
+ if (mpext) {
+ if (opts->csum_reqd)
+ map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
- remaining -= map_size;
- dss_size = map_size;
- if (mpext)
opts->ext_copy = *mpext;
+ }
+ remaining -= map_size;
+ dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true;
@@ -789,6 +824,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
if (subflow_req->mp_capable) {
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
+ opts->csum_reqd = subflow_req->csum_reqd;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
@@ -1007,7 +1043,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return;
}
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
@@ -1099,6 +1135,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
+ mpext->csum_reqd = mp_opt.csum_reqd;
+
+ if (mpext->csum_reqd)
+ mpext->csum = mp_opt.csum;
}
}
@@ -1118,25 +1158,50 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+ struct csum_pseudo_header header;
+ __wsum csum;
+
+ /* cfr RFC 8684 3.3.1.:
+ * the data sequence number used in the pseudo-header is
+ * always the 64-bit value, irrespective of what length is used in the
+ * DSS option itself.
+ */
+ header.data_seq = cpu_to_be64(mpext->data_seq);
+ header.subflow_seq = htonl(mpext->subflow_seq);
+ header.data_len = htons(mpext->data_len);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+ return (__force u16)csum_fold(csum);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
- u8 len;
+ u8 len, flag = MPTCP_CAP_HMAC_SHA256;
- if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
+ if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN;
- else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
+ } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
- else if (opts->ext_copy.data_len)
+ } else if (opts->ext_copy.data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
- else
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ } else {
len = TCPOLEN_MPTCP_MPC_ACK;
+ }
+
+ if (opts->csum_reqd)
+ flag |= MPTCP_CAP_CHECKSUM_REQD;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
- MPTCP_CAP_HMAC_SHA256);
+ flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
opts->suboptions))
@@ -1152,8 +1217,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (!opts->ext_copy.data_len)
goto mp_capable_done;
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ mptcp_make_csum(&opts->ext_copy), ptr);
+ } else {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
ptr += 1;
}
@@ -1305,6 +1375,9 @@ mp_capable_done:
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
+
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@ -1324,8 +1397,13 @@ mp_capable_done:
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mptcp_make_csum(mpext), ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
}
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 993095089990..42fc7187beee 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1308,6 +1308,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
}
+/* note: this always recompute the csum on the whole skb, even
+ * if we just appended a single frag. More status info needed
+ */
+static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
+{
+ struct mptcp_ext *mpext = mptcp_get_ext(skb);
+ __wsum csum = ~csum_unfold(mpext->csum);
+ int offset = skb->len - added;
+
+ mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1402,10 +1414,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (zero_window_probe) {
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
mpext->frozen = 1;
- ret = 0;
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
tcp_push_pending_frames(ssk);
+ return 0;
}
out:
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
}
@@ -2359,8 +2375,8 @@ static void __mptcp_retrans(struct sock *sk)
/* limit retransmission to the bytes already sent on some subflows */
info.sent = 0;
- info.limit = dfrag->already_sent;
- while (info.sent < dfrag->already_sent) {
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
+ while (info.sent < info.limit) {
if (!mptcp_alloc_tx_skb(sk, ssk))
break;
@@ -2372,9 +2388,11 @@ static void __mptcp_retrans(struct sock *sk)
copied += ret;
info.sent += ret;
}
- if (copied)
+ if (copied) {
+ dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ }
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
@@ -2453,6 +2471,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_init(msk);
@@ -2793,6 +2812,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
+ if (mp_opt->csum_reqd)
+ WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 89f6b73783d5..16e50caf200e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -68,6 +68,8 @@
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
+
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
#define MPTCPOPT_HMAC_LEN 20
@@ -124,6 +126,7 @@ struct mptcp_options_received {
u64 data_seq;
u32 subflow_seq;
u16 data_len;
+ __sum16 csum;
u16 mp_capable : 1,
mp_join : 1,
fastclose : 1,
@@ -133,6 +136,7 @@ struct mptcp_options_received {
rm_addr : 1,
mp_prio : 1,
echo : 1,
+ csum_reqd : 1,
backup : 1;
u32 token;
u32 nonce;
@@ -234,6 +238,7 @@ struct mptcp_sock {
bool snd_data_fin_enable;
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
+ bool csum_enabled;
spinlock_t join_list_lock;
struct sock *ack_hint;
struct work_struct work;
@@ -335,11 +340,19 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
+struct csum_pseudo_header {
+ __be64 data_seq;
+ __be32 subflow_seq;
+ __be16 data_len;
+ __sum16 csum;
+};
+
struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
mp_join : 1,
- backup : 1;
+ backup : 1,
+ csum_reqd : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
@@ -387,6 +400,8 @@ struct mptcp_subflow_context {
u32 map_subflow_seq;
u32 ssn_offset;
u32 map_data_len;
+ __wsum map_data_csum;
+ u32 map_csum_len;
u32 request_mptcp : 1, /* send MP_CAPABLE */
request_join : 1, /* send MP_JOIN */
request_bkup : 1,
@@ -396,6 +411,8 @@ struct mptcp_subflow_context {
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
+ map_csum_reqd : 1,
+ map_data_fin : 1,
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
@@ -525,6 +542,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net);
+int mptcp_is_checksum_enabled(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
@@ -576,7 +594,8 @@ int __init mptcp_proto_v6_init(void);
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct request_sock *req);
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 33956337c46b..6b1cd4257edf 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -108,6 +108,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
@@ -150,7 +151,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -247,7 +248,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
return -EINVAL;
@@ -394,7 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) {
if (!mp_opt.mp_capable) {
MPTCP_INC_STATS(sock_net(sk),
@@ -404,6 +405,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback;
}
+ if (mp_opt.csum_reqd)
+ WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
@@ -638,7 +641,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other
* options.
*/
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_capable) {
fallback = true;
goto create_child;
@@ -648,7 +651,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@ -824,10 +827,92 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
return true;
}
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
+ bool csum_reqd)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct csum_pseudo_header header;
+ u32 offset, seq, delta;
+ __wsum csum;
+ int len;
+
+ if (!csum_reqd)
+ return MAPPING_OK;
+
+ /* mapping already validated on previous traversal */
+ if (subflow->map_csum_len == subflow->map_data_len)
+ return MAPPING_OK;
+
+ /* traverse the receive queue, ensuring it contains a full
+ * DSS mapping and accumulating the related csum.
+ * Preserve the accoumlate csum across multiple calls, to compute
+ * the csum only once
+ */
+ delta = subflow->map_data_len - subflow->map_csum_len;
+ for (;;) {
+ seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
+ offset = seq - TCP_SKB_CB(skb)->seq;
+
+ /* if the current skb has not been accounted yet, csum its contents
+ * up to the amount covered by the current DSS
+ */
+ if (offset < skb->len) {
+ __wsum csum;
+
+ len = min(skb->len - offset, delta);
+ csum = skb_checksum(skb, offset, len, 0);
+ subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
+ subflow->map_csum_len);
+
+ delta -= len;
+ subflow->map_csum_len += len;
+ }
+ if (delta == 0)
+ break;
+
+ if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
+ /* if this subflow is closed, the partial mapping
+ * will be never completed; flush the pending skbs, so
+ * that subflow_sched_work_if_closed() can kick in
+ */
+ if (unlikely(ssk->sk_state == TCP_CLOSE))
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+
+ /* not enough data to validate the csum */
+ return MAPPING_EMPTY;
+ }
+
+ /* the DSS mapping for next skbs will be validated later,
+ * when a get_mapping_status call will process such skb
+ */
+ skb = skb->next;
+ }
+
+ /* note that 'map_data_len' accounts only for the carried data, does
+ * not include the eventual seq increment due to the data fin,
+ * while the pseudo header requires the original DSS data len,
+ * including that
+ */
+ header.data_seq = cpu_to_be64(subflow->map_seq);
+ header.subflow_seq = htonl(subflow->map_subflow_seq);
+ header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
+ if (unlikely(csum_fold(csum))) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ }
+
+ return MAPPING_OK;
+}
+
static enum mapping_status get_mapping_status(struct sock *ssk,
struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ bool csum_reqd = READ_ONCE(msk->csum_enabled);
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -920,9 +1005,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
/* Allow replacing only with an identical map */
if (subflow->map_seq == map_seq &&
subflow->map_subflow_seq == mpext->subflow_seq &&
- subflow->map_data_len == data_len) {
+ subflow->map_data_len == data_len &&
+ subflow->map_csum_reqd == mpext->csum_reqd) {
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+ goto validate_csum;
}
/* If this skb data are fully covered by the current mapping,
@@ -934,17 +1020,27 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
/* will validate the next map after consuming the current one */
- return MAPPING_OK;
+ goto validate_csum;
}
subflow->map_seq = map_seq;
subflow->map_subflow_seq = mpext->subflow_seq;
subflow->map_data_len = data_len;
subflow->map_valid = 1;
+ subflow->map_data_fin = mpext->data_fin;
subflow->mpc_map = mpext->mpc_map;
- pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
+ subflow->map_csum_reqd = mpext->csum_reqd;
+ subflow->map_csum_len = 0;
+ subflow->map_data_csum = csum_unfold(mpext->csum);
+
+ /* Cfr RFC 8684 Section 3.3.0 */
+ if (unlikely(subflow->map_csum_reqd != csum_reqd))
+ return MAPPING_INVALID;
+
+ pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
subflow->map_seq, subflow->map_subflow_seq,
- subflow->map_data_len);
+ subflow->map_data_len, subflow->map_csum_reqd,
+ subflow->map_data_csum);
validate_seq:
/* we revalidate valid mapping on new skb, because we must ensure
@@ -954,7 +1050,9 @@ validate_seq:
return MAPPING_INVALID;
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+
+validate_csum:
+ return validate_data_csum(ssk, skb, csum_reqd);
}
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 67954afef4e1..21ccf450e249 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -860,7 +860,7 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
-static s64 vsock_has_data(struct vsock_sock *vsk)
+static s64 vsock_connectible_has_data(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
@@ -1866,10 +1866,11 @@ out:
return err;
}
-static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
- long timeout,
- struct vsock_transport_recv_notify_data *recv_data,
- size_t target)
+static int vsock_connectible_wait_data(struct sock *sk,
+ struct wait_queue_entry *wait,
+ long timeout,
+ struct vsock_transport_recv_notify_data *recv_data,
+ size_t target)
{
const struct vsock_transport *transport;
struct vsock_sock *vsk;
@@ -1880,7 +1881,7 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
err = 0;
transport = vsk->transport;
- while ((data = vsock_has_data(vsk)) == 0) {
+ while ((data = vsock_connectible_has_data(vsk)) == 0) {
prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
if (sk->sk_err != 0 ||
@@ -1967,7 +1968,8 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
while (1) {
ssize_t read;
- err = vsock_wait_data(sk, &wait, timeout, &recv_data, target);
+ err = vsock_connectible_wait_data(sk, &wait, timeout,
+ &recv_data, target);
if (err <= 0)
break;
@@ -2022,7 +2024,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
- err = vsock_wait_data(sk, &wait, timeout, NULL, 0);
+ err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0);
if (err <= 0)
goto out;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 23704a6bc437..f014ccfdd9c2 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -413,7 +413,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
struct virtio_vsock_pkt *pkt;
int dequeued_len = 0;
size_t user_buf_len = msg_data_left(msg);
- bool copy_failed = false;
bool msg_ready = false;
spin_lock_bh(&vvs->rx_lock);
@@ -426,7 +425,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
while (!msg_ready) {
pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
- if (!copy_failed) {
+ if (dequeued_len >= 0) {
size_t pkt_len;
size_t bytes_to_copy;
@@ -443,11 +442,9 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
if (err) {
- /* Copy of message failed, set flag to skip
- * copy path for rest of fragments. Rest of
+ /* Copy of message failed. Rest of
* fragments will be freed without copy.
*/
- copy_failed = true;
dequeued_len = err;
} else {
user_buf_len -= bytes_to_copy;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 9ca5f1ba461e..69351c3eb68c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:f:t"
+optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
sin=""
sout=""
@@ -22,6 +22,7 @@ sndbuf=0
rcvbuf=0
options_log=true
do_tcp=0
+checksum=false
filesize=0
if [ $tc_loss -eq 100 ];then
@@ -47,6 +48,7 @@ usage() {
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
+ echo -e "\t-C: enable the MPTCP data checksum"
}
while getopts "$optstring" option;do
@@ -104,6 +106,9 @@ while getopts "$optstring" option;do
"t")
do_tcp=$((do_tcp+1))
;;
+ "C")
+ checksum=true
+ ;;
"?")
usage $0
exit 1
@@ -200,6 +205,12 @@ ip -net "$ns4" route add default via dead:beef:3::2
# use TCP syn cookies, even if no flooding was detected.
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+if $checksum; then
+ for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+ ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
+ done
+fi
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fd99485cf2a4..523c7797f30a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -12,6 +12,7 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
capture=0
+checksum=0
do_all_tests=1
TEST_COUNT=0
@@ -49,6 +50,9 @@ init()
ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ if [ $checksum -eq 1 ]; then
+ ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
+ fi
done
# ns1 ns2
@@ -124,6 +128,17 @@ reset_with_add_addr_timeout()
-j DROP
}
+reset_with_checksum()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+}
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -476,6 +491,45 @@ run_tests()
fi
}
+chk_csum_nr()
+{
+ local msg=${1:-""}
+ local count
+ local dump_stats
+
+ if [ ! -z "$msg" ]; then
+ printf "%02u" "$TEST_COUNT"
+ else
+ echo -n " "
+ fi
+ printf " %-36s %s" "$msg" "sum"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+ echo -n " - csum "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
chk_join_nr()
{
local msg="$1"
@@ -523,6 +577,9 @@ chk_join_nr()
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
+ if [ $checksum -eq 1 ]; then
+ chk_csum_nr
+ fi
}
chk_add_nr()
@@ -1374,6 +1431,37 @@ syncookies_tests()
chk_add_nr 1 1
}
+checksum_tests()
+{
+ # checksum test 0 0
+ reset_with_checksum 0 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 0"
+
+ # checksum test 1 1
+ reset_with_checksum 1 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 1"
+
+ # checksum test 0 1
+ reset_with_checksum 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 1"
+
+ # checksum test 1 0
+ reset_with_checksum 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 0"
+}
+
all_tests()
{
subflows_tests
@@ -1387,6 +1475,7 @@ all_tests()
backup_tests
add_addr_ports_tests
syncookies_tests
+ checksum_tests
}
usage()
@@ -1403,7 +1492,9 @@ usage()
echo " -b backup_tests"
echo " -p add_addr_ports_tests"
echo " -k syncookies_tests"
+ echo " -S checksum_tests"
echo " -c capture pcap files"
+ echo " -C enable data checksum"
echo " -h help"
}
@@ -1418,13 +1509,16 @@ make_file "$sin" "server" 1
trap cleanup EXIT
for arg in "$@"; do
- # check for "capture" arg before launching tests
+ # check for "capture/checksum" args before launching tests
if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
capture=1
fi
+ if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
+ checksum=1
+ fi
- # exception for the capture option, the rest means: a part of the tests
- if [ "${arg}" != "-c" ]; then
+ # exception for the capture/checksum options, the rest means: a part of the tests
+ if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
do_all_tests=0
fi
done
@@ -1434,7 +1528,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkch' opt; do
+while getopts 'fsltra64bpkchCS' opt; do
case $opt in
f)
subflows_tests
@@ -1469,8 +1563,13 @@ while getopts 'fsltra64bpkch' opt; do
k)
syncookies_tests
;;
+ S)
+ checksum_tests
+ ;;
c)
;;
+ C)
+ ;;
h | *)
usage
;;
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
new file mode 100755
index 000000000000..75ada17ac061
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -0,0 +1,573 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for
+# implementing IPv4/IPv6 L3 VPN use cases.
+#
+# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and
+# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and
+# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
+# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.
+# The SRv6 End.DT46 Behavior implementation is meant to support the
+# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
+# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies
+# the setup and operations of SRv6 VPNs.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with
+# each other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the
+# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected
+# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100
+# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach
+# hs-t200-3 and vice versa.
+#
+# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT46 Behavior and c) VRF.
+#
+# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly
+# consider an example where, within the same domain of tenant 100, the host
+# hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a arp/ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2,
+# the router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the
+# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6)
+# core network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the
+# host hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the roles of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6
+# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are
+# able to connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::3/64 | | | | cafe::4/64 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-t${tid}-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT46 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+
+ # set default routes to unreachable for both ipv4 and ipv6
+ ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+
+ ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_ipv6_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4/IPv6 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}