diff options
63 files changed, 2094 insertions, 679 deletions
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt index dc7961b33076..f55aa280d34f 100644 --- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt +++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt @@ -14,7 +14,10 @@ Required properties for all the ethernet interfaces: - "enet_csr": Ethernet control and status register address space - "ring_csr": Descriptor ring control and status register address space - "ring_cmd": Descriptor ring command register address space -- interrupts: Ethernet main interrupt +- interrupts: Two interrupt specifiers can be specified. + - First is the Rx interrupt. This irq is mandatory. + - Second is the Tx completion interrupt. + This is supported only on SGMII based 1GbE and 10GbE interfaces. - port-id: Port number (0 or 1) - clocks: Reference to the clock entry. - local-mac-address: MAC address assigned to this device diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index c1eb6911e539..e74f6e0a208c 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -638,7 +638,8 @@ <0x0 0x1f200000 0x0 0Xc300>, <0x0 0x1B000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; - interrupts = <0x0 0xA0 0x4>; + interrupts = <0x0 0xA0 0x4>, + <0x0 0xA1 0x4>; dma-coherent; clocks = <&sge0clk 0>; local-mac-address = [00 00 00 00 00 00]; @@ -652,7 +653,8 @@ <0x0 0x1f200000 0x0 0Xc300>, <0x0 0x1B000000 0x0 0X8000>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; - interrupts = <0x0 0xAC 0x4>; + interrupts = <0x0 0xAC 0x4>, + <0x0 0xAD 0x4>; port-id = <1>; dma-coherent; clocks = <&sge1clk 0>; @@ -667,7 +669,8 @@ <0x0 0x1f600000 0x0 0Xc300>, <0x0 0x18000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; - interrupts = <0x0 0x60 0x4>; + interrupts = <0x0 0x60 0x4>, + <0x0 0x61 0x4>; dma-coherent; clocks = <&xge0clk 0>; /* mac address will be overwritten by the bootloader */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c026ce9cd7b6..7b4684ccdb3f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4038,6 +4038,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_fix_features = bond_fix_features, .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_features_check = passthru_features_check, }; static const struct device_type bond_type = { diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c index e9c736e1cef3..2d7e1ffe9fdc 100644 --- a/drivers/net/dsa/mv88e6123_61_65.c +++ b/drivers/net/dsa/mv88e6123_61_65.c @@ -222,28 +222,6 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) val |= 0x000c; REG_WRITE(addr, 0x04, val); - /* Port Control 1: disable trunking. Also, if this is the - * CPU port, enable learn messages to be sent to this port. - */ - REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); - - /* Port based VLAN map: give each port its own address - * database, allow the CPU port to talk to each of the 'real' - * ports, and allow each of the 'real' ports to only talk to - * the upstream port. - */ - val = (p & 0xf) << 12; - if (dsa_is_cpu_port(ds, p)) - val |= ds->phys_port_mask; - else - val |= 1 << dsa_upstream_port(ds); - REG_WRITE(addr, 0x06, val); - - /* Default VLAN ID and priority: don't set a default VLAN - * ID, and set the default packet priority to zero. - */ - REG_WRITE(addr, 0x07, 0x0000); - /* Port Control 2: don't force a good FCS, set the maximum * frame size to 10240 bytes, don't let the switch add or * strip 802.1q tags, don't discard tagged or untagged frames @@ -288,18 +266,17 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) */ REG_WRITE(addr, 0x19, 0x7654); - return 0; + return mv88e6xxx_setup_port_common(ds, p); } static int mv88e6123_61_65_setup(struct dsa_switch *ds) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int i; int ret; - mutex_init(&ps->smi_mutex); - mutex_init(&ps->stats_mutex); - mutex_init(&ps->phy_mutex); + ret = mv88e6xxx_setup_common(ds); + if (ret < 0) + return ret; ret = mv88e6123_61_65_switch_reset(ds); if (ret < 0) diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 9808c860a797..18cfead83dc9 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -17,6 +17,10 @@ #include <net/dsa.h> #include "mv88e6xxx.h" +/* Switch product IDs */ +#define ID_6171 0x1710 +#define ID_6172 0x1720 + static char *mv88e6171_probe(struct device *host_dev, int sw_addr) { struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev); @@ -27,9 +31,9 @@ static char *mv88e6171_probe(struct device *host_dev, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { - if ((ret & 0xfff0) == 0x1710) + if ((ret & 0xfff0) == ID_6171) return "Marvell 88E6171"; - if ((ret & 0xfff0) == 0x1720) + if ((ret & 0xfff0) == ID_6172) return "Marvell 88E6172"; } @@ -221,28 +225,6 @@ static int mv88e6171_setup_port(struct dsa_switch *ds, int p) val |= 0x000c; REG_WRITE(addr, 0x04, val); - /* Port Control 1: disable trunking. Also, if this is the - * CPU port, enable learn messages to be sent to this port. - */ - REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); - - /* Port based VLAN map: give each port its own address - * database, allow the CPU port to talk to each of the 'real' - * ports, and allow each of the 'real' ports to only talk to - * the upstream port. - */ - val = (p & 0xf) << 12; - if (dsa_is_cpu_port(ds, p)) - val |= ds->phys_port_mask; - else - val |= 1 << dsa_upstream_port(ds); - REG_WRITE(addr, 0x06, val); - - /* Default VLAN ID and priority: don't set a default VLAN - * ID, and set the default packet priority to zero. - */ - REG_WRITE(addr, 0x07, 0x0000); - /* Port Control 2: don't force a good FCS, set the maximum * frame size to 10240 bytes, don't let the switch add or * strip 802.1q tags, don't discard tagged or untagged frames @@ -287,17 +269,17 @@ static int mv88e6171_setup_port(struct dsa_switch *ds, int p) */ REG_WRITE(addr, 0x19, 0x7654); - return 0; + return mv88e6xxx_setup_port_common(ds, p); } static int mv88e6171_setup(struct dsa_switch *ds) { - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); int i; int ret; - mutex_init(&ps->smi_mutex); - mutex_init(&ps->stats_mutex); + ret = mv88e6xxx_setup_common(ds); + if (ret < 0) + return ret; ret = mv88e6171_switch_reset(ds); if (ret < 0) @@ -318,8 +300,6 @@ static int mv88e6171_setup(struct dsa_switch *ds) return ret; } - mutex_init(&ps->phy_mutex); - return 0; } @@ -410,6 +390,28 @@ static int mv88e6171_get_sset_count(struct dsa_switch *ds) return ARRAY_SIZE(mv88e6171_hw_stats); } +static int mv88e6171_get_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + if (ps->id == ID_6172) + return mv88e6xxx_get_eee(ds, port, e); + + return -EOPNOTSUPP; +} + +static int mv88e6171_set_eee(struct dsa_switch *ds, int port, + struct phy_device *phydev, struct ethtool_eee *e) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + if (ps->id == ID_6172) + return mv88e6xxx_set_eee(ds, port, phydev, e); + + return -EOPNOTSUPP; +} + struct dsa_switch_driver mv88e6171_switch_driver = { .tag_protocol = DSA_TAG_PROTO_EDSA, .priv_size = sizeof(struct mv88e6xxx_priv_state), @@ -422,11 +424,19 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .get_strings = mv88e6171_get_strings, .get_ethtool_stats = mv88e6171_get_ethtool_stats, .get_sset_count = mv88e6171_get_sset_count, + .set_eee = mv88e6171_set_eee, + .get_eee = mv88e6171_get_eee, #ifdef CONFIG_NET_DSA_HWMON .get_temp = mv88e6xxx_get_temp, #endif .get_regs_len = mv88e6xxx_get_regs_len, .get_regs = mv88e6xxx_get_regs, + .port_join_bridge = mv88e6xxx_join_bridge, + .port_leave_bridge = mv88e6xxx_leave_bridge, + .port_stp_update = mv88e6xxx_port_stp_update, + .fdb_add = mv88e6xxx_port_fdb_add, + .fdb_del = mv88e6xxx_port_fdb_del, + .fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6171"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index 7bc5998384c6..41fe3a6a72d1 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -215,28 +215,6 @@ static int mv88e6352_setup_port(struct dsa_switch *ds, int p) val |= 0x000c; REG_WRITE(addr, 0x04, val); - /* Port Control 1: disable trunking. Also, if this is the - * CPU port, enable learn messages to be sent to this port. - */ - REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); - - /* Port based VLAN map: give each port its own address - * database, allow the CPU port to talk to each of the 'real' - * ports, and allow each of the 'real' ports to only talk to - * the upstream port. - */ - val = (p & 0xf) << 12; - if (dsa_is_cpu_port(ds, p)) - val |= ds->phys_port_mask; - else - val |= 1 << dsa_upstream_port(ds); - REG_WRITE(addr, 0x06, val); - - /* Default VLAN ID and priority: don't set a default VLAN - * ID, and set the default packet priority to zero. - */ - REG_WRITE(addr, 0x07, 0x0000); - /* Port Control 2: don't force a good FCS, set the maximum * frame size to 10240 bytes, don't let the switch add or * strip 802.1q tags, don't discard tagged or untagged frames @@ -281,7 +259,7 @@ static int mv88e6352_setup_port(struct dsa_switch *ds, int p) */ REG_WRITE(addr, 0x19, 0x7654); - return 0; + return mv88e6xxx_setup_port_common(ds, p); } #ifdef CONFIG_NET_DSA_HWMON @@ -385,12 +363,11 @@ static int mv88e6352_setup(struct dsa_switch *ds) int ret; int i; - mutex_init(&ps->smi_mutex); - mutex_init(&ps->stats_mutex); - mutex_init(&ps->phy_mutex); - mutex_init(&ps->eeprom_mutex); + ret = mv88e6xxx_setup_common(ds); + if (ret < 0) + return ret; - ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0; + mutex_init(&ps->eeprom_mutex); ret = mv88e6352_switch_reset(ds); if (ret < 0) @@ -729,6 +706,12 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .set_eeprom = mv88e6352_set_eeprom, .get_regs_len = mv88e6xxx_get_regs_len, .get_regs = mv88e6xxx_get_regs, + .port_join_bridge = mv88e6xxx_join_bridge, + .port_leave_bridge = mv88e6xxx_leave_bridge, + .port_stp_update = mv88e6xxx_port_stp_update, + .fdb_add = mv88e6xxx_port_fdb_add, + .fdb_del = mv88e6xxx_port_fdb_del, + .fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6352"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index c18ffc98aacc..13572cc24c6d 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -9,6 +9,8 @@ */ #include <linux/delay.h> +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> #include <linux/jiffies.h> #include <linux/list.h> #include <linux/module.h> @@ -72,19 +74,16 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) return ret & 0xffff; } -int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +/* Must be called with SMI mutex held */ +static int _mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); int ret; if (bus == NULL) return -EINVAL; - mutex_lock(&ps->smi_mutex); ret = __mv88e6xxx_reg_read(bus, ds->pd->sw_addr, addr, reg); - mutex_unlock(&ps->smi_mutex); - if (ret < 0) return ret; @@ -94,6 +93,18 @@ int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) return ret; } +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = _mv88e6xxx_reg_read(ds, addr, reg); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, int reg, u16 val) { @@ -125,11 +136,11 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, return 0; } -int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +/* Must be called with SMI mutex held */ +static int _mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, + u16 val) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); - int ret; if (bus == NULL) return -EINVAL; @@ -137,8 +148,16 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) dev_dbg(ds->master_dev, "-> addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n", addr, reg, val); + return __mv88e6xxx_reg_write(bus, ds->pd->sw_addr, addr, reg, val); +} + +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + mutex_lock(&ps->smi_mutex); - ret = __mv88e6xxx_reg_write(bus, ds->pd->sw_addr, addr, reg, val); + ret = _mv88e6xxx_reg_write(ds, addr, reg, val); mutex_unlock(&ps->smi_mutex); return ret; @@ -627,6 +646,31 @@ int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) return mv88e6xxx_wait(ds, REG_GLOBAL2, 0x14, 0x8000); } +/* Must be called with SMI lock held */ +static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) +{ + unsigned long timeout = jiffies + HZ / 10; + + while (time_before(jiffies, timeout)) { + int ret; + + ret = _mv88e6xxx_reg_read(ds, reg, offset); + if (ret < 0) + return ret; + if (!(ret & mask)) + return 0; + + usleep_range(1000, 2000); + } + return -ETIMEDOUT; +} + +/* Must be called with SMI lock held */ +static int _mv88e6xxx_atu_wait(struct dsa_switch *ds) +{ + return _mv88e6xxx_wait(ds, REG_GLOBAL, 0x0b, ATU_BUSY); +} + int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum) { int ret; @@ -700,6 +744,423 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, return 0; } +static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, int fid, u16 cmd) +{ + int ret; + + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x01, fid); + if (ret < 0) + return ret; + + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0b, cmd); + if (ret < 0) + return ret; + + return _mv88e6xxx_atu_wait(ds); +} + +static int _mv88e6xxx_flush_fid(struct dsa_switch *ds, int fid) +{ + int ret; + + ret = _mv88e6xxx_atu_wait(ds); + if (ret < 0) + return ret; + + return _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_FLUSH_NONSTATIC_FID); +} + +static int mv88e6xxx_set_port_state(struct dsa_switch *ds, int port, u8 state) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int reg, ret; + u8 oldstate; + + mutex_lock(&ps->smi_mutex); + + reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), 0x04); + if (reg < 0) + goto abort; + + oldstate = reg & PSTATE_MASK; + if (oldstate != state) { + /* Flush forwarding database if we're moving a port + * from Learning or Forwarding state to Disabled or + * Blocking or Listening state. + */ + if (oldstate >= PSTATE_LEARNING && state <= PSTATE_BLOCKING) { + ret = _mv88e6xxx_flush_fid(ds, ps->fid[port]); + if (ret) + goto abort; + } + reg = (reg & ~PSTATE_MASK) | state; + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x04, reg); + } + +abort: + mutex_unlock(&ps->smi_mutex); + return ret; +} + +/* Must be called with smi lock held */ +static int _mv88e6xxx_update_port_config(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u8 fid = ps->fid[port]; + u16 reg = fid << 12; + + if (dsa_is_cpu_port(ds, port)) + reg |= ds->phys_port_mask; + else + reg |= (ps->bridge_mask[fid] | + (1 << dsa_upstream_port(ds))) & ~(1 << port); + + return _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x06, reg); +} + +/* Must be called with smi lock held */ +static int _mv88e6xxx_update_bridge_config(struct dsa_switch *ds, int fid) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int port; + u32 mask; + int ret; + + mask = ds->phys_port_mask; + while (mask) { + port = __ffs(mask); + mask &= ~(1 << port); + if (ps->fid[port] != fid) + continue; + + ret = _mv88e6xxx_update_port_config(ds, port); + if (ret) + return ret; + } + + return _mv88e6xxx_flush_fid(ds, fid); +} + +/* Bridge handling functions */ + +int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret = 0; + u32 nmask; + int fid; + + /* If the bridge group is not empty, join that group. + * Otherwise create a new group. + */ + fid = ps->fid[port]; + nmask = br_port_mask & ~(1 << port); + if (nmask) + fid = ps->fid[__ffs(nmask)]; + + nmask = ps->bridge_mask[fid] | (1 << port); + if (nmask != br_port_mask) { + netdev_err(ds->ports[port], + "join: Bridge port mask mismatch fid=%d mask=0x%x expected 0x%x\n", + fid, br_port_mask, nmask); + return -EINVAL; + } + + mutex_lock(&ps->smi_mutex); + + ps->bridge_mask[fid] = br_port_mask; + + if (fid != ps->fid[port]) { + ps->fid_mask |= 1 << ps->fid[port]; + ps->fid[port] = fid; + ret = _mv88e6xxx_update_bridge_config(ds, fid); + } + + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u8 fid, newfid; + int ret; + + fid = ps->fid[port]; + + if (ps->bridge_mask[fid] != br_port_mask) { + netdev_err(ds->ports[port], + "leave: Bridge port mask mismatch fid=%d mask=0x%x expected 0x%x\n", + fid, br_port_mask, ps->bridge_mask[fid]); + return -EINVAL; + } + + /* If the port was the last port of a bridge, we are done. + * Otherwise assign a new fid to the port, and fix up + * the bridge configuration. + */ + if (br_port_mask == (1 << port)) + return 0; + + mutex_lock(&ps->smi_mutex); + + newfid = __ffs(ps->fid_mask); + ps->fid[port] = newfid; + ps->fid_mask &= (1 << newfid); + ps->bridge_mask[fid] &= ~(1 << port); + ps->bridge_mask[newfid] = 1 << port; + + ret = _mv88e6xxx_update_bridge_config(ds, fid); + if (!ret) + ret = _mv88e6xxx_update_bridge_config(ds, newfid); + + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int stp_state; + + switch (state) { + case BR_STATE_DISABLED: + stp_state = PSTATE_DISABLED; + break; + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + stp_state = PSTATE_BLOCKING; + break; + case BR_STATE_LEARNING: + stp_state = PSTATE_LEARNING; + break; + case BR_STATE_FORWARDING: + default: + stp_state = PSTATE_FORWARDING; + break; + } + + netdev_dbg(ds->ports[port], "port state %d [%d]\n", state, stp_state); + + /* mv88e6xxx_port_stp_update may be called with softirqs disabled, + * so we can not update the port state directly but need to schedule it. + */ + ps->port_state[port] = stp_state; + set_bit(port, &ps->port_state_update_mask); + schedule_work(&ps->bridge_work); + + return 0; +} + +static int __mv88e6xxx_write_addr(struct dsa_switch *ds, + const unsigned char *addr) +{ + int i, ret; + + for (i = 0; i < 3; i++) { + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0d + i, + (addr[i * 2] << 8) | addr[i * 2 + 1]); + if (ret < 0) + return ret; + } + + return 0; +} + +static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr) +{ + int i, ret; + + for (i = 0; i < 3; i++) { + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x0d + i); + if (ret < 0) + return ret; + addr[i * 2] = ret >> 8; + addr[i * 2 + 1] = ret & 0xff; + } + + return 0; +} + +static int __mv88e6xxx_port_fdb_cmd(struct dsa_switch *ds, int port, + const unsigned char *addr, int state) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u8 fid = ps->fid[port]; + int ret; + + ret = _mv88e6xxx_atu_wait(ds); + if (ret < 0) + return ret; + + ret = __mv88e6xxx_write_addr(ds, addr); + if (ret < 0) + return ret; + + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0c, + (0x10 << port) | state); + if (ret) + return ret; + + ret = _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_LOAD_FID); + + return ret; +} + +int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) +{ + int state = is_multicast_ether_addr(addr) ? + FDB_STATE_MC_STATIC : FDB_STATE_STATIC; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, state); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, FDB_STATE_UNUSED); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +static int __mv88e6xxx_port_getnext(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u8 fid = ps->fid[port]; + int ret, state; + + ret = _mv88e6xxx_atu_wait(ds); + if (ret < 0) + return ret; + + ret = __mv88e6xxx_write_addr(ds, addr); + if (ret < 0) + return ret; + + do { + ret = _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_GETNEXT_FID); + if (ret < 0) + return ret; + + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x0c); + if (ret < 0) + return ret; + state = ret & FDB_STATE_MASK; + if (state == FDB_STATE_UNUSED) + return -ENOENT; + } while (!(((ret >> 4) & 0xff) & (1 << port))); + + ret = __mv88e6xxx_read_addr(ds, addr); + if (ret < 0) + return ret; + + *is_static = state == (is_multicast_ether_addr(addr) ? + FDB_STATE_MC_STATIC : FDB_STATE_STATIC); + + return 0; +} + +/* get next entry for port */ +int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_port_getnext(ds, port, addr, is_static); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +static void mv88e6xxx_bridge_work(struct work_struct *work) +{ + struct mv88e6xxx_priv_state *ps; + struct dsa_switch *ds; + int port; + + ps = container_of(work, struct mv88e6xxx_priv_state, bridge_work); + ds = ((struct dsa_switch *)ps) - 1; + + while (ps->port_state_update_mask) { + port = __ffs(ps->port_state_update_mask); + clear_bit(port, &ps->port_state_update_mask); + mv88e6xxx_set_port_state(ds, port, ps->port_state[port]); + } +} + +int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret, fid; + + mutex_lock(&ps->smi_mutex); + + /* Port Control 1: disable trunking, disable sending + * learning messages to this port. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x05, 0x0000); + if (ret) + goto abort; + + /* Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the upstream port. + */ + fid = __ffs(ps->fid_mask); + ps->fid[port] = fid; + ps->fid_mask &= ~(1 << fid); + + if (!dsa_is_cpu_port(ds, port)) + ps->bridge_mask[fid] = 1 << port; + + ret = _mv88e6xxx_update_port_config(ds, port); + if (ret) + goto abort; + + /* Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x07, 0x0000); +abort: + mutex_unlock(&ps->smi_mutex); + return ret; +} + +int mv88e6xxx_setup_common(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + mutex_init(&ps->smi_mutex); + mutex_init(&ps->stats_mutex); + mutex_init(&ps->phy_mutex); + + ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0; + + ps->fid_mask = (1 << DSA_MAX_PORTS) - 1; + + INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work); + + return 0; +} + static int __init mv88e6xxx_init(void) { #if IS_ENABLED(CONFIG_NET_DSA_MV88E6131) diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 5fd42ced9011..aaf239aba726 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -15,6 +15,30 @@ #define REG_GLOBAL 0x1b #define REG_GLOBAL2 0x1c +/* ATU commands */ + +#define ATU_BUSY 0x8000 + +#define ATU_CMD_LOAD_FID (ATU_BUSY | 0x3000) +#define ATU_CMD_GETNEXT_FID (ATU_BUSY | 0x4000) +#define ATU_CMD_FLUSH_NONSTATIC_FID (ATU_BUSY | 0x6000) + +/* port states */ + +#define PSTATE_MASK 0x03 +#define PSTATE_DISABLED 0x00 +#define PSTATE_BLOCKING 0x01 +#define PSTATE_LEARNING 0x02 +#define PSTATE_FORWARDING 0x03 + +/* FDB states */ + +#define FDB_STATE_MASK 0x0f + +#define FDB_STATE_UNUSED 0x00 +#define FDB_STATE_MC_STATIC 0x07 /* static multicast */ +#define FDB_STATE_STATIC 0x0e /* static unicast */ + struct mv88e6xxx_priv_state { /* When using multi-chip addressing, this mutex protects * access to the indirect access registers. (In single-chip @@ -49,6 +73,17 @@ struct mv88e6xxx_priv_state { struct mutex eeprom_mutex; int id; /* switch product id */ + + /* hw bridging */ + + u32 fid_mask; + u8 fid[DSA_MAX_PORTS]; + u16 bridge_mask[DSA_MAX_PORTS]; + + unsigned long port_state_update_mask; + u8 port_state[DSA_MAX_PORTS]; + + struct work_struct bridge_work; }; struct mv88e6xxx_hw_stat { @@ -57,6 +92,8 @@ struct mv88e6xxx_hw_stat { int reg; }; +int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port); +int mv88e6xxx_setup_common(struct dsa_switch *ds); int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, @@ -91,6 +128,15 @@ int mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int addr, int regnum, int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e); +int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask); +int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask); +int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state); +int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); +int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); +int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static); extern struct dsa_switch_driver mv88e6131_switch_driver; extern struct dsa_switch_driver mv88e6123_61_65_switch_driver; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 6146a993a136..40d3530d7f30 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -428,13 +428,23 @@ static int xgene_enet_register_irq(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct device *dev = ndev_to_dev(ndev); + struct xgene_enet_desc_ring *ring; int ret; - ret = devm_request_irq(dev, pdata->rx_ring->irq, xgene_enet_rx_irq, - IRQF_SHARED, ndev->name, pdata->rx_ring); - if (ret) { - netdev_err(ndev, "rx%d interrupt request failed\n", - pdata->rx_ring->irq); + ring = pdata->rx_ring; + ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq, + IRQF_SHARED, ring->irq_name, ring); + if (ret) + netdev_err(ndev, "Failed to request irq %s\n", ring->irq_name); + + if (pdata->cq_cnt) { + ring = pdata->tx_ring->cp_ring; + ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq, + IRQF_SHARED, ring->irq_name, ring); + if (ret) { + netdev_err(ndev, "Failed to request irq %s\n", + ring->irq_name); + } } return ret; @@ -448,6 +458,37 @@ static void xgene_enet_free_irq(struct net_device *ndev) pdata = netdev_priv(ndev); dev = ndev_to_dev(ndev); devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring); + + if (pdata->cq_cnt) { + devm_free_irq(dev, pdata->tx_ring->cp_ring->irq, + pdata->tx_ring->cp_ring); + } +} + +static void xgene_enet_napi_enable(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + napi_enable(napi); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + napi_enable(napi); + } +} + +static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + napi_disable(napi); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + napi_disable(napi); + } } static int xgene_enet_open(struct net_device *ndev) @@ -462,7 +503,7 @@ static int xgene_enet_open(struct net_device *ndev) ret = xgene_enet_register_irq(ndev); if (ret) return ret; - napi_enable(&pdata->rx_ring->napi); + xgene_enet_napi_enable(pdata); if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) phy_start(pdata->phy_dev); @@ -486,7 +527,7 @@ static int xgene_enet_close(struct net_device *ndev) else cancel_delayed_work_sync(&pdata->link_work); - napi_disable(&pdata->rx_ring->napi); + xgene_enet_napi_disable(pdata); xgene_enet_free_irq(ndev); xgene_enet_process_ring(pdata->rx_ring, -1); @@ -580,6 +621,8 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata) if (ring) { if (ring->cp_ring && ring->cp_ring->cp_skb) devm_kfree(dev, ring->cp_ring->cp_skb); + if (ring->cp_ring && pdata->cq_cnt) + xgene_enet_free_desc_ring(ring->cp_ring); xgene_enet_free_desc_ring(ring); } @@ -673,6 +716,12 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) rx_ring->nbufpool = NUM_BUFPOOL; rx_ring->buf_pool = buf_pool; rx_ring->irq = pdata->rx_irq; + if (!pdata->cq_cnt) { + snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc", + ndev->name); + } else { + snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx", ndev->name); + } buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots, sizeof(struct sk_buff *), GFP_KERNEL); if (!buf_pool->rx_skb) { @@ -694,7 +743,22 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) } pdata->tx_ring = tx_ring; - cp_ring = pdata->rx_ring; + if (!pdata->cq_cnt) { + cp_ring = pdata->rx_ring; + } else { + /* allocate tx completion descriptor ring */ + ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); + cp_ring = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_16KB, + ring_id); + if (!cp_ring) { + ret = -ENOMEM; + goto err; + } + cp_ring->irq = pdata->txc_irq; + snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc", ndev->name); + } + cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots, sizeof(struct sk_buff *), GFP_KERNEL); if (!cp_ring->cp_skb) { @@ -853,14 +917,6 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) return -ENOMEM; } - ret = platform_get_irq(pdev, 0); - if (ret <= 0) { - dev_err(dev, "Unable to get ENET Rx IRQ\n"); - ret = ret ? : -ENXIO; - return ret; - } - pdata->rx_irq = ret; - ret = xgene_get_port_id(dev, pdata); if (ret) return ret; @@ -882,6 +938,24 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) return -ENODEV; } + ret = platform_get_irq(pdev, 0); + if (ret <= 0) { + dev_err(dev, "Unable to get ENET Rx IRQ\n"); + ret = ret ? : -ENXIO; + return ret; + } + pdata->rx_irq = ret; + + if (pdata->phy_mode != PHY_INTERFACE_MODE_RGMII) { + ret = platform_get_irq(pdev, 1); + if (ret <= 0) { + dev_err(dev, "Unable to get ENET Tx completion IRQ\n"); + ret = ret ? : -ENXIO; + return ret; + } + pdata->txc_irq = ret; + } + pdata->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) { /* Firmware may have set up the clock already. */ @@ -950,11 +1024,13 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) pdata->mac_ops = &xgene_sgmac_ops; pdata->port_ops = &xgene_sgport_ops; pdata->rm = RM1; + pdata->cq_cnt = XGENE_MAX_TXC_RINGS; break; default: pdata->mac_ops = &xgene_xgmac_ops; pdata->port_ops = &xgene_xgport_ops; pdata->rm = RM0; + pdata->cq_cnt = XGENE_MAX_TXC_RINGS; break; } @@ -977,12 +1053,38 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) } +static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + netif_napi_add(pdata->ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + netif_napi_add(pdata->ndev, napi, xgene_enet_napi, + NAPI_POLL_WEIGHT); + } +} + +static void xgene_enet_napi_del(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + netif_napi_del(napi); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + netif_napi_del(napi); + } +} + static int xgene_enet_probe(struct platform_device *pdev) { struct net_device *ndev; struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; - struct napi_struct *napi; struct xgene_mac_ops *mac_ops; int ret; @@ -1024,8 +1126,7 @@ static int xgene_enet_probe(struct platform_device *pdev) if (ret) goto err; - napi = &pdata->rx_ring->napi; - netif_napi_add(ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); + xgene_enet_napi_add(pdata); mac_ops = pdata->mac_ops; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) ret = xgene_enet_mdio_config(pdata); @@ -1052,7 +1153,7 @@ static int xgene_enet_remove(struct platform_device *pdev) mac_ops->rx_disable(pdata); mac_ops->tx_disable(pdata); - netif_napi_del(&pdata->rx_ring->napi); + xgene_enet_napi_del(pdata); xgene_enet_mdio_remove(pdata); xgene_enet_delete_desc_rings(pdata); unregister_netdev(ndev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index b93ed21a157f..8f3d232b09bc 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -51,6 +51,9 @@ #define START_BP_BUFNUM_1 0x2A #define START_RING_NUM_1 264 +#define IRQ_ID_SIZE 16 +#define XGENE_MAX_TXC_RINGS 1 + #define PHY_POLL_LINK_ON (10 * HZ) #define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) @@ -63,6 +66,7 @@ struct xgene_enet_desc_ring { u16 tail; u16 slots; u16 irq; + char irq_name[IRQ_ID_SIZE]; u32 size; u32 state[NUM_RING_CONFIG]; void __iomem *cmd_base; @@ -117,6 +121,8 @@ struct xgene_enet_pdata { u32 cp_qcnt_hi; u32 cp_qcnt_low; u32 rx_irq; + u32 txc_irq; + u8 cq_cnt; void __iomem *eth_csr_addr; void __iomem *eth_ring_if_addr; void __iomem *eth_diag_csr_addr; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 9677431c582a..039b0c1f480e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12557,6 +12557,7 @@ static netdev_features_t bnx2x_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c38d5429e27a..31e14079e1d7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -964,36 +964,58 @@ static void bcmgenet_free_cb(struct enet_cb *cb) dma_unmap_addr_set(cb, dma_addr, 0); } -static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_rx_ring16_int_disable(struct bcmgenet_rx_ring *ring) { - bcmgenet_intrl2_0_writel(priv, + bcmgenet_intrl2_0_writel(ring->priv, + UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE, + INTRL2_CPU_MASK_SET); +} + +static inline void bcmgenet_rx_ring16_int_enable(struct bcmgenet_rx_ring *ring) +{ + bcmgenet_intrl2_0_writel(ring->priv, + UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE, + INTRL2_CPU_MASK_CLEAR); +} + +static inline void bcmgenet_rx_ring_int_disable(struct bcmgenet_rx_ring *ring) +{ + bcmgenet_intrl2_1_writel(ring->priv, + 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index), + INTRL2_CPU_MASK_SET); +} + +static inline void bcmgenet_rx_ring_int_enable(struct bcmgenet_rx_ring *ring) +{ + bcmgenet_intrl2_1_writel(ring->priv, + 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index), + INTRL2_CPU_MASK_CLEAR); +} + +static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_tx_ring *ring) +{ + bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE, INTRL2_CPU_MASK_SET); } -static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_tx_ring *ring) { - bcmgenet_intrl2_0_writel(priv, + bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE, INTRL2_CPU_MASK_CLEAR); } -static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_tx_ring *ring) { - bcmgenet_intrl2_1_writel(priv, (1 << ring->index), + bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index, INTRL2_CPU_MASK_CLEAR); - priv->int1_mask &= ~(1 << ring->index); } -static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_tx_ring *ring) { - bcmgenet_intrl2_1_writel(priv, (1 << ring->index), + bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index, INTRL2_CPU_MASK_SET); - priv->int1_mask |= (1 << ring->index); } /* Unlocked version of the reclaim routine */ @@ -1085,7 +1107,7 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) if (work_done == 0) { napi_complete(napi); - ring->int_enable(ring->priv, ring); + ring->int_enable(ring); return 0; } @@ -1396,11 +1418,10 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, /* bcmgenet_desc_rx - descriptor based rx process. * this could be called from bottom half, or from NAPI polling method. */ -static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, - unsigned int index, +static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, unsigned int budget) { - struct bcmgenet_rx_ring *ring = &priv->rx_rings[index]; + struct bcmgenet_priv *priv = ring->priv; struct net_device *dev = priv->dev; struct enet_cb *cb; struct sk_buff *skb; @@ -1412,7 +1433,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, unsigned int discards; unsigned int chksum_ok = 0; - p_index = bcmgenet_rdma_ring_readl(priv, index, RDMA_PROD_INDEX); + p_index = bcmgenet_rdma_ring_readl(priv, ring->index, RDMA_PROD_INDEX); discards = (p_index >> DMA_P_INDEX_DISCARD_CNT_SHIFT) & DMA_P_INDEX_DISCARD_CNT_MASK; @@ -1425,7 +1446,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, /* Clear HW register when we reach 75% of maximum 0xFFFF */ if (ring->old_discards >= 0xC000) { ring->old_discards = 0; - bcmgenet_rdma_ring_writel(priv, index, 0, + bcmgenet_rdma_ring_writel(priv, ring->index, 0, RDMA_PROD_INDEX); } } @@ -1533,7 +1554,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, dev->stats.multicast++; /* Notify kernel */ - napi_gro_receive(&priv->napi, skb); + napi_gro_receive(&ring->napi, skb); netif_dbg(priv, rx_status, dev, "pushed up to kernel\n"); next: @@ -1544,12 +1565,29 @@ next: ring->read_ptr = ring->cb_ptr; ring->c_index = (ring->c_index + 1) & DMA_C_INDEX_MASK; - bcmgenet_rdma_ring_writel(priv, index, ring->c_index, RDMA_CONS_INDEX); + bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX); } return rxpktprocessed; } +/* Rx NAPI polling method */ +static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) +{ + struct bcmgenet_rx_ring *ring = container_of(napi, + struct bcmgenet_rx_ring, napi); + unsigned int work_done; + + work_done = bcmgenet_desc_rx(ring, budget); + + if (work_done < budget) { + napi_complete(napi); + ring->int_enable(ring); + } + + return work_done; +} + /* Assign skb to RX DMA descriptor. */ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv, struct bcmgenet_rx_ring *ring) @@ -1658,8 +1696,10 @@ static int init_umac(struct bcmgenet_priv *priv) { struct device *kdev = &priv->pdev->dev; int ret; - u32 reg, cpu_mask_clear; - int index; + u32 reg; + u32 int0_enable = 0; + u32 int1_enable = 0; + int i; dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); @@ -1686,15 +1726,17 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intr_disable(priv); - cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE; + /* Enable Rx default queue 16 interrupts */ + int0_enable |= (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE); - dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__); + /* Enable Tx default queue 16 interrupts */ + int0_enable |= (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE); /* Monitor cable plug/unplugged event for internal PHY */ if (phy_is_internal(priv->phydev)) { - cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); + int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); } else if (priv->ext_phy) { - cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); + int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { reg = bcmgenet_bp_mc_get(priv); reg |= BIT(priv->hw_params->bp_in_en_shift); @@ -1709,13 +1751,18 @@ static int init_umac(struct bcmgenet_priv *priv) /* Enable MDIO interrupts on GENET v3+ */ if (priv->hw_params->flags & GENET_HAS_MDIO_INTR) - cpu_mask_clear |= UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR; + int0_enable |= (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR); + + /* Enable Rx priority queue interrupts */ + for (i = 0; i < priv->hw_params->rx_queues; ++i) + int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i)); - bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR); + /* Enable Tx priority queue interrupts */ + for (i = 0; i < priv->hw_params->tx_queues; ++i) + int1_enable |= (1 << i); - for (index = 0; index < priv->hw_params->tx_queues; index++) - bcmgenet_intrl2_1_writel(priv, (1 << index), - INTRL2_CPU_MASK_CLEAR); + bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); + bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); /* Enable rx/tx engine.*/ dev_dbg(kdev, "done init umac\n"); @@ -1734,7 +1781,6 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, spin_lock_init(&ring->lock); ring->priv = priv; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); ring->index = index; if (index == DESC_INDEX) { ring->queue = 0; @@ -1778,17 +1824,6 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, TDMA_WRITE_PTR); bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, DMA_END_ADDR); - - napi_enable(&ring->napi); -} - -static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv, - unsigned int index) -{ - struct bcmgenet_tx_ring *ring = &priv->tx_rings[index]; - - napi_disable(&ring->napi); - netif_napi_del(&ring->napi); } /* Initialize a RDMA ring */ @@ -1800,7 +1835,15 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, u32 words_per_bd = WORDS_PER_BD(priv); int ret; + ring->priv = priv; ring->index = index; + if (index == DESC_INDEX) { + ring->int_enable = bcmgenet_rx_ring16_int_enable; + ring->int_disable = bcmgenet_rx_ring16_int_disable; + } else { + ring->int_enable = bcmgenet_rx_ring_int_enable; + ring->int_disable = bcmgenet_rx_ring_int_disable; + } ring->cbs = priv->rx_cbs + start_ptr; ring->size = size; ring->c_index = 0; @@ -1836,6 +1879,62 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, return ret; } +static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + } + + ring = &priv->tx_rings[DESC_INDEX]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); +} + +static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + napi_enable(&ring->napi); + } + + ring = &priv->tx_rings[DESC_INDEX]; + napi_enable(&ring->napi); +} + +static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + napi_disable(&ring->napi); + } + + ring = &priv->tx_rings[DESC_INDEX]; + napi_disable(&ring->napi); +} + +static void bcmgenet_fini_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + netif_napi_del(&ring->napi); + } + + ring = &priv->tx_rings[DESC_INDEX]; + netif_napi_del(&ring->napi); +} + /* Initialize Tx queues * * Queues 0-3 are priority-based, each one has 32 descriptors, @@ -1896,6 +1995,9 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1); bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2); + /* Initialize Tx NAPI */ + bcmgenet_init_tx_napi(priv); + /* Enable Tx queues */ bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG); @@ -1905,6 +2007,62 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL); } +static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); + } + + ring = &priv->rx_rings[DESC_INDEX]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); +} + +static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + napi_enable(&ring->napi); + } + + ring = &priv->rx_rings[DESC_INDEX]; + napi_enable(&ring->napi); +} + +static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + napi_disable(&ring->napi); + } + + ring = &priv->rx_rings[DESC_INDEX]; + napi_disable(&ring->napi); +} + +static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + netif_napi_del(&ring->napi); + } + + ring = &priv->rx_rings[DESC_INDEX]; + netif_napi_del(&ring->napi); +} + /* Initialize Rx queues * * Queues 0-15 are priority queues. Hardware Filtering Block (HFB) can be @@ -1954,6 +2112,9 @@ static int bcmgenet_init_rx_queues(struct net_device *dev) ring_cfg |= (1 << DESC_INDEX); dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT)); + /* Initialize Rx NAPI */ + bcmgenet_init_rx_napi(priv); + /* Enable rings */ bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG); @@ -2037,12 +2198,8 @@ static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv) static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) { - int i; - - bcmgenet_fini_tx_ring(priv, DESC_INDEX); - - for (i = 0; i < priv->hw_params->tx_queues; i++) - bcmgenet_fini_tx_ring(priv, i); + bcmgenet_fini_rx_napi(priv); + bcmgenet_fini_tx_napi(priv); __bcmgenet_fini_dma(priv); } @@ -2056,9 +2213,6 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) netif_dbg(priv, hw, priv->dev, "%s\n", __func__); - /* Init rDma */ - bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); - /* Initialize common Rx ring structures */ priv->rx_bds = priv->base + priv->hw_params->rdma_offset; priv->num_rx_bds = TOTAL_DESC; @@ -2072,25 +2226,13 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) cb->bd_addr = priv->rx_bds + i * DMA_DESC_SIZE; } - /* Initialize Rx queues */ - ret = bcmgenet_init_rx_queues(priv->dev); - if (ret) { - netdev_err(priv->dev, "failed to initialize Rx queues\n"); - bcmgenet_free_rx_buffers(priv); - kfree(priv->rx_cbs); - return ret; - } - - /* Init tDma */ - bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); - /* Initialize common TX ring structures */ priv->tx_bds = priv->base + priv->hw_params->tdma_offset; priv->num_tx_bds = TOTAL_DESC; priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb), GFP_KERNEL); if (!priv->tx_cbs) { - __bcmgenet_fini_dma(priv); + kfree(priv->rx_cbs); return -ENOMEM; } @@ -2099,28 +2241,26 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) cb->bd_addr = priv->tx_bds + i * DMA_DESC_SIZE; } - /* Initialize Tx queues */ - bcmgenet_init_tx_queues(priv->dev); - - return 0; -} + /* Init rDma */ + bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); -/* NAPI polling method*/ -static int bcmgenet_poll(struct napi_struct *napi, int budget) -{ - struct bcmgenet_priv *priv = container_of(napi, - struct bcmgenet_priv, napi); - unsigned int work_done; + /* Initialize Rx queues */ + ret = bcmgenet_init_rx_queues(priv->dev); + if (ret) { + netdev_err(priv->dev, "failed to initialize Rx queues\n"); + bcmgenet_free_rx_buffers(priv); + kfree(priv->rx_cbs); + kfree(priv->tx_cbs); + return ret; + } - work_done = bcmgenet_desc_rx(priv, DESC_INDEX, budget); + /* Init tDma */ + bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); - if (work_done < budget) { - napi_complete(napi); - bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE, - INTRL2_CPU_MASK_CLEAR); - } + /* Initialize Tx queues */ + bcmgenet_init_tx_queues(priv->dev); - return work_done; + return 0; } /* Interrupt bottom half */ @@ -2147,50 +2287,66 @@ static void bcmgenet_irq_task(struct work_struct *work) } } -/* bcmgenet_isr1: interrupt handler for ring buffer. */ +/* bcmgenet_isr1: handle Rx and Tx priority queues */ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; - struct bcmgenet_tx_ring *ring; + struct bcmgenet_rx_ring *rx_ring; + struct bcmgenet_tx_ring *tx_ring; unsigned int index; /* Save irq status for bottom-half processing. */ priv->irq1_stat = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); + /* clear interrupts */ bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); - /* Check the MBDONE interrupts. - * packet is done, reclaim descriptors - */ + /* Check Rx priority queue interrupts */ + for (index = 0; index < priv->hw_params->rx_queues; index++) { + if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) + continue; + + rx_ring = &priv->rx_rings[index]; + + if (likely(napi_schedule_prep(&rx_ring->napi))) { + rx_ring->int_disable(rx_ring); + __napi_schedule(&rx_ring->napi); + } + } + + /* Check Tx priority queue interrupts */ for (index = 0; index < priv->hw_params->tx_queues; index++) { if (!(priv->irq1_stat & BIT(index))) continue; - ring = &priv->tx_rings[index]; + tx_ring = &priv->tx_rings[index]; - if (likely(napi_schedule_prep(&ring->napi))) { - ring->int_disable(priv, ring); - __napi_schedule(&ring->napi); + if (likely(napi_schedule_prep(&tx_ring->napi))) { + tx_ring->int_disable(tx_ring); + __napi_schedule(&tx_ring->napi); } } return IRQ_HANDLED; } -/* bcmgenet_isr0: Handle various interrupts. */ +/* bcmgenet_isr0: handle Rx and Tx default queues + other stuff */ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; + struct bcmgenet_rx_ring *rx_ring; + struct bcmgenet_tx_ring *tx_ring; /* Save irq status for bottom-half processing. */ priv->irq0_stat = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); + /* clear interrupts */ bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR); @@ -2198,25 +2354,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) "IRQ=0x%x\n", priv->irq0_stat); if (priv->irq0_stat & (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE)) { - /* We use NAPI(software interrupt throttling, if - * Rx Descriptor throttling is not used. - * Disable interrupt, will be enabled in the poll method. - */ - if (likely(napi_schedule_prep(&priv->napi))) { - bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE, - INTRL2_CPU_MASK_SET); - __napi_schedule(&priv->napi); + rx_ring = &priv->rx_rings[DESC_INDEX]; + + if (likely(napi_schedule_prep(&rx_ring->napi))) { + rx_ring->int_disable(rx_ring); + __napi_schedule(&rx_ring->napi); } } - if (priv->irq0_stat & - (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { - struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX]; - if (likely(napi_schedule_prep(&ring->napi))) { - ring->int_disable(priv, ring); - __napi_schedule(&ring->napi); + if (priv->irq0_stat & (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { + tx_ring = &priv->tx_rings[DESC_INDEX]; + + if (likely(napi_schedule_prep(&tx_ring->napi))) { + tx_ring->int_disable(tx_ring); + __napi_schedule(&tx_ring->napi); } } + if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | UMAC_IRQ_PHY_DET_F | UMAC_IRQ_LINK_UP | @@ -2463,7 +2617,8 @@ static void bcmgenet_netif_start(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); /* Start the network engine */ - napi_enable(&priv->napi); + bcmgenet_enable_rx_napi(priv); + bcmgenet_enable_tx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); @@ -2568,10 +2723,10 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - napi_disable(&priv->napi); phy_stop(priv->phydev); - bcmgenet_intr_disable(priv); + bcmgenet_disable_rx_napi(priv); + bcmgenet_disable_tx_napi(priv); /* Wait for pending work items to complete. Since interrupts are * disabled no new work will be scheduled. @@ -2972,7 +3127,6 @@ static int bcmgenet_probe(struct platform_device *pdev) dev->watchdog_timeo = 2 * HZ; dev->ethtool_ops = &bcmgenet_ethtool_ops; dev->netdev_ops = &bcmgenet_netdev_ops; - netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64); priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 7a59879d441f..a834da1dfe4c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -310,6 +310,11 @@ struct bcmgenet_mib_counters { #define UMAC_IRQ_MDIO_DONE (1 << 23) #define UMAC_IRQ_MDIO_ERROR (1 << 24) +/* INTRL2 instance 1 definitions */ +#define UMAC_IRQ1_TX_INTR_MASK 0xFFFF +#define UMAC_IRQ1_RX_INTR_MASK 0xFFFF +#define UMAC_IRQ1_RX_INTR_SHIFT 16 + /* Register block offsets */ #define GENET_SYS_OFF 0x0000 #define GENET_GR_BRIDGE_OFF 0x0040 @@ -535,14 +540,13 @@ struct bcmgenet_tx_ring { unsigned int prod_index; /* Tx ring producer index SW copy */ unsigned int cb_ptr; /* Tx ring initial CB ptr */ unsigned int end_ptr; /* Tx ring end CB ptr */ - void (*int_enable)(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *); - void (*int_disable)(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *); + void (*int_enable)(struct bcmgenet_tx_ring *); + void (*int_disable)(struct bcmgenet_tx_ring *); struct bcmgenet_priv *priv; }; struct bcmgenet_rx_ring { + struct napi_struct napi; /* Rx NAPI struct */ unsigned int index; /* Rx ring index */ struct enet_cb *cbs; /* Rx ring buffer control block */ unsigned int size; /* Rx ring size */ @@ -551,6 +555,9 @@ struct bcmgenet_rx_ring { unsigned int cb_ptr; /* Rx ring initial CB ptr */ unsigned int end_ptr; /* Rx ring end CB ptr */ unsigned int old_discards; + void (*int_enable)(struct bcmgenet_rx_ring *); + void (*int_disable)(struct bcmgenet_rx_ring *); + struct bcmgenet_priv *priv; }; /* device context */ @@ -558,11 +565,6 @@ struct bcmgenet_priv { void __iomem *base; enum bcmgenet_version version; struct net_device *dev; - u32 int0_mask; - u32 int1_mask; - - /* NAPI for descriptor based rx */ - struct napi_struct napi ____cacheline_aligned; /* transmit variables */ void __iomem *tx_bds; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c index 062d3c0b5818..6c8a62eefe51 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c @@ -46,17 +46,17 @@ bool cxgb_fcoe_sof_eof_supported(struct adapter *adap, struct sk_buff *skb) if ((sof != FC_SOF_I3) && (sof != FC_SOF_N3)) { dev_err(adap->pdev_dev, "Unsupported SOF 0x%x\n", sof); - return 0; + return false; } skb_copy_bits(skb, skb->len - 4, &eof, 1); if ((eof != FC_EOF_N) && (eof != FC_EOF_T)) { dev_err(adap->pdev_dev, "Unsupported EOF 0x%x\n", eof); - return 0; + return false; } - return 1; + return true; } /** diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index eb39673ed6a6..4b0494b9cc7c 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -30,11 +30,12 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/u64_stats_sync.h> +#include <linux/cpumask.h> #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "10.4u" +#define DRV_VER "10.6.0.1" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" @@ -183,6 +184,7 @@ struct be_eq_obj { u16 spurious_intr; struct napi_struct napi; struct be_adapter *adapter; + cpumask_var_t affinity_mask; #ifdef CONFIG_NET_RX_BUSY_POLL #define BE_EQ_IDLE 0 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index d8df78b6554d..5ff7fba9b67c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2342,6 +2342,7 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); } + free_cpumask_var(eqo->affinity_mask); be_queue_free(adapter, &eqo->q); } } @@ -2357,6 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), + eqo->affinity_mask); + netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); @@ -2448,8 +2454,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter) static int be_tx_qs_create(struct be_adapter *adapter) { - struct be_queue_info *cq, *eq; + struct be_queue_info *cq; struct be_tx_obj *txo; + struct be_eq_obj *eqo; int status, i; adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter)); @@ -2467,8 +2474,8 @@ static int be_tx_qs_create(struct be_adapter *adapter) /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ - eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; - status = be_cmd_cq_create(adapter, cq, eq, false, 3); + eqo = &adapter->eq_obj[i % adapter->num_evt_qs]; + status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3); if (status) return status; @@ -2480,6 +2487,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) status = be_cmd_txq_create(adapter, txo); if (status) return status; + + netif_set_xps_queue(adapter->netdev, eqo->affinity_mask, + eqo->idx); } dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n", @@ -3028,6 +3038,8 @@ static int be_msix_register(struct be_adapter *adapter) status = request_irq(vec, be_msix, 0, eqo->desc, eqo); if (status) goto err_msix; + + irq_set_affinity_hint(vec, eqo->affinity_mask); } return 0; @@ -3072,7 +3084,7 @@ static void be_irq_unregister(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct be_eq_obj *eqo; - int i; + int i, vec; if (!adapter->isr_registered) return; @@ -3084,8 +3096,11 @@ static void be_irq_unregister(struct be_adapter *adapter) } /* MSIx */ - for_all_evt_queues(adapter, eqo, i) - free_irq(be_msix_vec_get(adapter, eqo), eqo); + for_all_evt_queues(adapter, eqo, i) { + vec = be_msix_vec_get(adapter, eqo); + irq_set_affinity_hint(vec, NULL); + free_irq(vec, eqo); + } done: adapter->isr_registered = false; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 0e07545ccc97..8457d0306e3a 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2093,6 +2093,7 @@ static const struct net_device_ops igb_netdev_ops = { #endif .ndo_fix_features = igb_fix_features, .ndo_set_features = igb_set_features, + .ndo_features_check = passthru_features_check, }; /** diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a8339e98ad24..ebc93a101c93 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2373,6 +2373,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index a430a34a4434..367f3976df56 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -507,6 +507,7 @@ static netdev_features_t qlcnic_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } #endif diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 8678e39aba08..5a1a3e7c93e7 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2204,27 +2204,18 @@ static int try_toggle_control_gpio(struct device *dev, int value, unsigned int nsdelay) { struct gpio_desc *gpio = *desc; + enum gpiod_flags flags = value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH; int res; - gpio = devm_gpiod_get_index(dev, name, index); - if (IS_ERR(gpio)) { - if (PTR_ERR(gpio) == -ENOENT) { - *desc = NULL; - return 0; - } - + gpio = devm_gpiod_get_index_optional(dev, name, index, flags); + if (IS_ERR(gpio)) return PTR_ERR(gpio); + + if (gpio) { + if (nsdelay) + usleep_range(nsdelay, 2 * nsdelay); + gpiod_set_value_cansleep(gpio, value); } - res = gpiod_direction_output(gpio, !value); - if (res) { - dev_err(dev, "unable to toggle gpio %s: %i\n", name, res); - devm_gpiod_put(dev, gpio); - gpio = NULL; - return res; - } - if (nsdelay) - usleep_range(nsdelay, 2 * nsdelay); - gpiod_set_value_cansleep(gpio, value); *desc = gpio; return 0; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 4815843a6019..384f057d6570 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -131,6 +131,7 @@ struct hv_netvsc_packet { struct hv_device *device; bool is_data_pkt; + bool xmit_more; /* from skb */ u16 vlan_tci; u16 q_idx; @@ -596,7 +597,16 @@ struct nvsp_message { #define VRSS_SEND_TAB_SIZE 16 -/* Per netvsc channel-specific */ +#define RNDIS_MAX_PKT_DEFAULT 8 +#define RNDIS_PKT_ALIGN_DEFAULT 8 + +struct multi_send_data { + spinlock_t lock; /* protect struct multi_send_data */ + struct hv_netvsc_packet *pkt; /* netvsc pkt pending */ + u32 count; /* counter of batched packets */ +}; + +/* Per netvsc device */ struct netvsc_device { struct hv_device *dev; @@ -647,6 +657,10 @@ struct netvsc_device { unsigned char *cb_buffer; /* The sub channel callback buffer */ unsigned char *sub_cb_buf; + + struct multi_send_data msd[NR_CPUS]; + u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ + u32 pkt_align; /* alignment bytes, e.g. 8 */ }; /* NdisInitialize message */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 208eb05446ba..b81bd37d3afb 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -37,6 +37,7 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device) { struct netvsc_device *net_device; struct net_device *ndev = hv_get_drvdata(device); + int i; net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL); if (!net_device) @@ -53,6 +54,11 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device) net_device->destroy = false; net_device->dev = device; net_device->ndev = ndev; + net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; + net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; + + for (i = 0; i < num_online_cpus(); i++) + spin_lock_init(&net_device->msd[i].lock); hv_set_drvdata(device, net_device); return net_device; @@ -687,12 +693,23 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, unsigned int section_index, + u32 pend_size, struct hv_netvsc_packet *packet) { char *start = net_device->send_buf; - char *dest = (start + (section_index * net_device->send_section_size)); + char *dest = start + (section_index * net_device->send_section_size) + + pend_size; int i; u32 msg_size = 0; + u32 padding = 0; + u32 remain = packet->total_data_buflen % net_device->pkt_align; + + /* Add padding */ + if (packet->is_data_pkt && packet->xmit_more && remain) { + padding = net_device->pkt_align - remain; + packet->rndis_msg->msg_len += padding; + packet->total_data_buflen += padding; + } for (i = 0; i < packet->page_buf_cnt; i++) { char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); @@ -703,67 +720,48 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, msg_size += len; dest += len; } + + if (padding) { + memset(dest, 0, padding); + msg_size += padding; + } + return msg_size; } -int netvsc_send(struct hv_device *device, - struct hv_netvsc_packet *packet) +static inline int netvsc_send_pkt( + struct hv_netvsc_packet *packet, + struct netvsc_device *net_device) { - struct netvsc_device *net_device; - int ret = 0; - struct nvsp_message sendMessage; - struct net_device *ndev; - struct vmbus_channel *out_channel = NULL; - u64 req_id; - unsigned int section_index = NETVSC_INVALID_INDEX; - u32 msg_size = 0; - struct sk_buff *skb = NULL; + struct nvsp_message nvmsg; + struct vmbus_channel *out_channel = packet->channel; u16 q_idx = packet->q_idx; + struct net_device *ndev = net_device->ndev; + u64 req_id; + int ret; - - net_device = get_outbound_net_device(device); - if (!net_device) - return -ENODEV; - ndev = net_device->ndev; - - sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; + nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; if (packet->is_data_pkt) { /* 0 is RMC_DATA; */ - sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0; + nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0; } else { /* 1 is RMC_CONTROL; */ - sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1; + nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1; } - /* Attempt to send via sendbuf */ - if (packet->total_data_buflen < net_device->send_section_size) { - section_index = netvsc_get_next_send_section(net_device); - if (section_index != NETVSC_INVALID_INDEX) { - msg_size = netvsc_copy_to_send_buf(net_device, - section_index, - packet); - skb = (struct sk_buff *) - (unsigned long)packet->send_completion_tid; - packet->page_buf_cnt = 0; - } - } - packet->send_buf_index = section_index; - - - sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index = - section_index; - sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size; + nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index = + packet->send_buf_index; + if (packet->send_buf_index == NETVSC_INVALID_INDEX) + nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; + else + nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = + packet->total_data_buflen; if (packet->send_completion) req_id = (ulong)packet; else req_id = 0; - out_channel = net_device->chn_table[packet->q_idx]; - if (out_channel == NULL) - out_channel = device->channel; - packet->channel = out_channel; - if (out_channel->rescind) return -ENODEV; @@ -771,11 +769,12 @@ int netvsc_send(struct hv_device *device, ret = vmbus_sendpacket_pagebuffer(out_channel, packet->page_buf, packet->page_buf_cnt, - &sendMessage, + &nvmsg, sizeof(struct nvsp_message), req_id); } else { - ret = vmbus_sendpacket(out_channel, &sendMessage, + ret = vmbus_sendpacket( + out_channel, &nvmsg, sizeof(struct nvsp_message), req_id, VM_PKT_DATA_INBAND, @@ -809,6 +808,102 @@ int netvsc_send(struct hv_device *device, packet, ret); } + return ret; +} + +int netvsc_send(struct hv_device *device, + struct hv_netvsc_packet *packet) +{ + struct netvsc_device *net_device; + int ret = 0, m_ret = 0; + struct vmbus_channel *out_channel; + u16 q_idx = packet->q_idx; + u32 pktlen = packet->total_data_buflen, msd_len = 0; + unsigned int section_index = NETVSC_INVALID_INDEX; + struct sk_buff *skb = NULL; + unsigned long flag; + struct multi_send_data *msdp; + struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; + + net_device = get_outbound_net_device(device); + if (!net_device) + return -ENODEV; + + out_channel = net_device->chn_table[q_idx]; + if (!out_channel) { + out_channel = device->channel; + q_idx = 0; + packet->q_idx = 0; + } + packet->channel = out_channel; + packet->send_buf_index = NETVSC_INVALID_INDEX; + + msdp = &net_device->msd[q_idx]; + + /* batch packets in send buffer if possible */ + spin_lock_irqsave(&msdp->lock, flag); + if (msdp->pkt) + msd_len = msdp->pkt->total_data_buflen; + + if (packet->is_data_pkt && msd_len > 0 && + msdp->count < net_device->max_pkt && + msd_len + pktlen + net_device->pkt_align < + net_device->send_section_size) { + section_index = msdp->pkt->send_buf_index; + + } else if (packet->is_data_pkt && pktlen + net_device->pkt_align < + net_device->send_section_size) { + section_index = netvsc_get_next_send_section(net_device); + if (section_index != NETVSC_INVALID_INDEX) { + msd_send = msdp->pkt; + msdp->pkt = NULL; + msdp->count = 0; + msd_len = 0; + } + } + + if (section_index != NETVSC_INVALID_INDEX) { + netvsc_copy_to_send_buf(net_device, + section_index, msd_len, + packet); + skb = (struct sk_buff *) + (unsigned long)packet->send_completion_tid; + + packet->page_buf_cnt = 0; + packet->send_buf_index = section_index; + packet->total_data_buflen += msd_len; + + kfree(msdp->pkt); + if (packet->xmit_more) { + msdp->pkt = packet; + msdp->count++; + } else { + cur_send = packet; + msdp->pkt = NULL; + msdp->count = 0; + } + } else { + msd_send = msdp->pkt; + msdp->pkt = NULL; + msdp->count = 0; + cur_send = packet; + } + + spin_unlock_irqrestore(&msdp->lock, flag); + + if (msd_send) { + m_ret = netvsc_send_pkt(msd_send, net_device); + + if (m_ret != 0) { + netvsc_free_send_slot(net_device, + msd_send->send_buf_index); + kfree(msd_send); + } + } + + if (cur_send) + ret = netvsc_send_pkt(cur_send, net_device); + if (ret != 0) { if (section_index != NETVSC_INVALID_INDEX) netvsc_free_send_slot(net_device, section_index); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index a06bd6614007..0c998186039e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -413,6 +413,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } + packet->xmit_more = skb->xmit_more; + packet->vlan_tci = skb->vlan_tci; packet->q_idx = skb_get_queue_mapping(skb); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index ca81de04bc76..fdfab1feccfc 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -237,6 +237,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, } packet->send_completion = NULL; + packet->xmit_more = false; ret = netvsc_send(dev->net_dev->dev, packet); return ret; @@ -855,6 +856,7 @@ static int rndis_filter_init_device(struct rndis_device *dev) u32 status; int ret; unsigned long t; + struct netvsc_device *nvdev = dev->net_dev; request = get_rndis_request(dev, RNDIS_MSG_INIT, RNDIS_MESSAGE_SIZE(struct rndis_initialize_request)); @@ -889,6 +891,8 @@ static int rndis_filter_init_device(struct rndis_device *dev) status = init_complete->status; if (status == RNDIS_STATUS_SUCCESS) { dev->state = RNDIS_DEV_INITIALIZED; + nvdev->max_pkt = init_complete->max_pkt_per_msg; + nvdev->pkt_align = 1 << init_complete->pkt_alignment_factor; ret = 0; } else { dev->state = RNDIS_DEV_UNINITIALIZED; @@ -1137,8 +1141,6 @@ int rndis_filter_device_add(struct hv_device *dev, net_device->num_chn = 1 + init_packet->msg.v5_msg.subchn_comp.num_subchannels; - vmbus_are_subchannels_present(dev->channel); - ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); out: diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a23319fc78ca..6928448f6b7f 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1979,6 +1979,7 @@ static const struct net_device_ops team_netdev_ops = { .ndo_change_carrier = team_change_carrier, .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_features_check = passthru_features_check, }; /*********************** diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4e84236b62ce..a829930dac15 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -749,9 +749,9 @@ static int virtnet_poll(struct napi_struct *napi, int budget) { struct receive_queue *rq = container_of(napi, struct receive_queue, napi); - unsigned int r, received = 0; + unsigned int r, received; - received += virtnet_receive(rq, budget - received); + received = virtnet_receive(rq, budget); /* Out of packets? */ if (received < budget) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 280a315de8d6..d5cda067115a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -59,6 +59,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ }; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b11b28a30b9e..920e4457ce6e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -561,4 +561,71 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, skb->protocol = htons(ETH_P_802_2); } +/** + * skb_vlan_tagged - check if skb is vlan tagged. + * @skb: skbuff to query + * + * Returns true if the skb is tagged, regardless of whether it is hardware + * accelerated or not. + */ +static inline bool skb_vlan_tagged(const struct sk_buff *skb) +{ + if (!skb_vlan_tag_present(skb) && + likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) + return false; + + return true; +} + +/** + * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. + * @skb: skbuff to query + * + * Returns true if the skb is tagged with multiple vlan headers, regardless + * of whether it is hardware accelerated or not. + */ +static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) +{ + __be16 protocol = skb->protocol; + + if (!skb_vlan_tag_present(skb)) { + struct vlan_ethhdr *veh; + + if (likely(protocol != htons(ETH_P_8021Q) && + protocol != htons(ETH_P_8021AD))) + return false; + + veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } + + if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + return false; + + return true; +} + +/** + * vlan_features_check - drop unsafe features for skb with multiple tags. + * @skb: skbuff to query + * @features: features to be checked + * + * Returns features without unsafe ones if the skb has multiple tags. + */ +static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, + netdev_features_t features) +{ + if (skb_vlan_tagged_multi(skb)) + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + return features; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 08c4ab37189f..967bb4c8caf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3657,6 +3657,9 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99f2e49a8a07..e23d242d1230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -88,7 +88,7 @@ struct rhashtable_compare_arg { }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, const void *obj); @@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn( const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? - rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } diff --git a/include/net/dsa.h b/include/net/dsa.h index 47917e5e1e12..fbca63ba8f73 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,6 +296,12 @@ struct dsa_switch_driver { u32 br_port_mask); int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); + int (*fdb_add)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*fdb_del)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*fdb_getnext)(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static); }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d756af559977..b8cd60dcb4e1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -138,19 +138,12 @@ struct nft_userdata { /** * struct nft_set_elem - generic representation of set elements * - * @cookie: implementation specific element cookie * @key: element key - * @data: element data (maps only) - * @flags: element flags (end of interval) - * - * The cookie can be used to store a handle to the element for subsequent - * removal. + * @priv: element private data and extensions */ struct nft_set_elem { - void *cookie; struct nft_data key; - struct nft_data data; - u32 flags; + void *priv; }; struct nft_set; @@ -202,11 +195,15 @@ struct nft_set_estimate { enum nft_set_class class; }; +struct nft_set_ext; + /** * struct nft_set_ops - nf_tables set operations * * @lookup: look up an element within the set * @insert: insert new element into set + * @activate: activate new element in the next generation + * @deactivate: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -214,16 +211,19 @@ struct nft_set_estimate { * @destroy: destroy private data of set instance * @list: nf_tables_set_ops list node * @owner: module reference + * @elemsize: element private size * @features: features supported by the implementation */ struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data); - int (*get)(const struct nft_set *set, - struct nft_set_elem *elem); + const struct nft_set_ext **ext); int (*insert)(const struct nft_set *set, const struct nft_set_elem *elem); + void (*activate)(const struct nft_set *set, + const struct nft_set_elem *elem); + void * (*deactivate)(const struct nft_set *set, + const struct nft_set_elem *elem); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, @@ -241,6 +241,7 @@ struct nft_set_ops { struct list_head list; struct module *owner; + unsigned int elemsize; u32 features; }; @@ -259,6 +260,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @nelems: number of elements * @policy: set parameterization (see enum nft_set_policies) * @ops: set ops + * @pnet: network namespace * @flags: set flags * @klen: key length * @dlen: data length @@ -275,6 +277,7 @@ struct nft_set { u16 policy; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; + possible_net_t pnet; u16 flags; u8 klen; u8 dlen; @@ -311,6 +314,121 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); +/** + * enum nft_set_extensions - set extension type IDs + * + * @NFT_SET_EXT_KEY: element key + * @NFT_SET_EXT_DATA: mapping data + * @NFT_SET_EXT_FLAGS: element flags + * @NFT_SET_EXT_NUM: number of extension types + */ +enum nft_set_extensions { + NFT_SET_EXT_KEY, + NFT_SET_EXT_DATA, + NFT_SET_EXT_FLAGS, + NFT_SET_EXT_NUM +}; + +/** + * struct nft_set_ext_type - set extension type + * + * @len: fixed part length of the extension + * @align: alignment requirements of the extension + */ +struct nft_set_ext_type { + u8 len; + u8 align; +}; + +extern const struct nft_set_ext_type nft_set_ext_types[]; + +/** + * struct nft_set_ext_tmpl - set extension template + * + * @len: length of extension area + * @offset: offsets of individual extension types + */ +struct nft_set_ext_tmpl { + u16 len; + u8 offset[NFT_SET_EXT_NUM]; +}; + +/** + * struct nft_set_ext - set extensions + * + * @genmask: generation mask + * @offset: offsets of individual extension types + * @data: beginning of extension data + */ +struct nft_set_ext { + u8 genmask; + u8 offset[NFT_SET_EXT_NUM]; + char data[0]; +}; + +static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) +{ + memset(tmpl, 0, sizeof(*tmpl)); + tmpl->len = sizeof(struct nft_set_ext); +} + +static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) +{ + tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); + BUG_ON(tmpl->len > U8_MAX); + tmpl->offset[id] = tmpl->len; + tmpl->len += nft_set_ext_types[id].len + len; +} + +static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +{ + nft_set_ext_add_length(tmpl, id, 0); +} + +static inline void nft_set_ext_init(struct nft_set_ext *ext, + const struct nft_set_ext_tmpl *tmpl) +{ + memcpy(ext->offset, tmpl->offset, sizeof(ext->offset)); +} + +static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return !!ext->offset[id]; +} + +static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return ext && __nft_set_ext_exists(ext, id); +} + +static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id) +{ + return (void *)ext + ext->offset[id]; +} + +static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_KEY); +} + +static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_DATA); +} + +static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_FLAGS); +} + +static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, + void *elem) +{ + return elem + set->ops->elemsize; +} + +void nft_set_elem_destroy(const struct nft_set *set, void *elem); /** * struct nft_expr_type - nf_tables expression type @@ -449,7 +567,6 @@ enum nft_chain_flags { * * @rules: list of rules in the chain * @list: used internally - * @net: net namespace that this chain belongs to * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain @@ -460,7 +577,6 @@ enum nft_chain_flags { struct nft_chain { struct list_head rules; struct list_head list; - struct net *net; struct nft_table *table; u64 handle; u32 use; @@ -512,6 +628,7 @@ struct nft_stats { * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops + * @pnet: net namespace that this chain belongs to * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -519,6 +636,7 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; + possible_net_t pnet; const struct nf_chain_type *type; u8 policy; struct nft_stats __percpu *stats; @@ -605,6 +723,50 @@ void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_SET() \ MODULE_ALIAS("nft-set") +/* + * The gencursor defines two generations, the currently active and the + * next one. Objects contain a bitmask of 2 bits specifying the generations + * they're active in. A set bit means they're inactive in the generation + * represented by that bit. + * + * New objects start out as inactive in the current and active in the + * next generation. When committing the ruleset the bitmask is cleared, + * meaning they're active in all generations. When removing an object, + * it is set inactive in the next generation. After committing the ruleset, + * the objects are removed. + */ +static inline unsigned int nft_gencursor_next(const struct net *net) +{ + return net->nft.gencursor + 1 == 1 ? 1 : 0; +} + +static inline u8 nft_genmask_next(const struct net *net) +{ + return 1 << nft_gencursor_next(net); +} + +static inline u8 nft_genmask_cur(const struct net *net) +{ + /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */ + return 1 << ACCESS_ONCE(net->nft.gencursor); +} + +/* + * Set element transaction helpers + */ + +static inline bool nft_set_elem_active(const struct nft_set_ext *ext, + u8 genmask) +{ + return !(ext->genmask & genmask); +} + +static inline void nft_set_elem_change_active(const struct nft_set *set, + struct nft_set_ext *ext) +{ + ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); +} + /** * struct nft_trans - nf_tables object update in transaction * diff --git a/include/net/tcp.h b/include/net/tcp.h index 42690daa924e..963303fb96ae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -529,8 +529,6 @@ int tcp_write_wakeup(struct sock *); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); -bool tcp_syn_flood_action(struct sock *sk, const struct sk_buff *skb, - const char *proto); void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 27dc4ec58840..74aab6e0d964 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_func_id { BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ + BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0e714f799ec0..630a7bac1e51 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -773,6 +773,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, expected_type = CONST_IMM; } else if (arg_type == ARG_CONST_MAP_PTR) { expected_type = CONST_PTR_TO_MAP; + } else if (arg_type == ARG_PTR_TO_CTX) { + expected_type = PTR_TO_CTX; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4b7b7e672b93..4898442b837f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -691,7 +691,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * struct rhash_head node; * }; * - * u32 my_hash_fn(const void *data, u32 seed) + * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f196552ec3c4..8b5ab9033b41 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -554,6 +554,7 @@ static int vlan_dev_init(struct net_device *dev) if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); + dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE; /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; diff --git a/net/core/dev.c b/net/core/dev.c index a0408d497dae..3a06003ecafd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2562,12 +2562,26 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return features; +} +EXPORT_SYMBOL(passthru_features_check); + +static netdev_features_t dflt_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return vlan_features_check(skb, features); +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; - __be16 protocol = skb->protocol; if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; @@ -2579,34 +2593,17 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb->encapsulation) features &= dev->hw_enc_features; - if (!skb_vlan_tag_present(skb)) { - if (unlikely(protocol == htons(ETH_P_8021Q) || - protocol == htons(ETH_P_8021AD))) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else { - goto finalize; - } - } - - features = netdev_intersect_features(features, - dev->vlan_features | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) + if (skb_vlan_tagged(skb)) features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | + dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); -finalize: if (dev->netdev_ops->ndo_features_check) features &= dev->netdev_ops->ndo_features_check(skb, dev, features); + else + features &= dflt_features_check(skb, dev, features); return harmonize_features(skb, features); } diff --git a/net/core/filter.c b/net/core/filter.c index 32f43c59908c..444a07e4f68d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1175,6 +1175,56 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + unsigned int offset = (unsigned int) r2; + void *from = (void *) (long) r3; + unsigned int len = (unsigned int) r4; + char buf[16]; + void *ptr; + + /* bpf verifier guarantees that: + * 'from' pointer points to bpf program stack + * 'len' bytes of it were initialized + * 'len' > 0 + * 'skb' is a valid pointer to 'struct sk_buff' + * + * so check for invalid 'offset' and too large 'len' + */ + if (offset > 0xffff || len > sizeof(buf)) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, len, buf); + if (unlikely(!ptr)) + return -EFAULT; + + skb_postpull_rcsum(skb, ptr, len); + + memcpy(ptr, from, len); + + if (ptr == buf) + /* skb_store_bits cannot return -EFAULT here */ + skb_store_bits(skb, offset, ptr, len); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + return 0; +} + +const struct bpf_func_proto bpf_skb_store_bytes_proto = { + .func = bpf_skb_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, +}; + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -1194,6 +1244,17 @@ sk_filter_func_proto(enum bpf_func_id func_id) } } +static const struct bpf_func_proto * +tc_cls_act_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + default: + return sk_filter_func_proto(func_id); + } +} + static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { @@ -1270,18 +1331,24 @@ static const struct bpf_verifier_ops sk_filter_ops = { .convert_ctx_access = sk_filter_convert_ctx_access, }; +static const struct bpf_verifier_ops tc_cls_act_ops = { + .get_func_proto = tc_cls_act_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, +}; + static struct bpf_prog_type_list sk_filter_type __read_mostly = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; static struct bpf_prog_type_list sched_cls_type __read_mostly = { - .ops = &sk_filter_ops, + .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_CLS, }; static struct bpf_prog_type_list sched_act_type __read_mostly = { - .ops = &sk_filter_ops, + .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_ACT, }; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 39555f3f263b..3597724ec3d8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -201,6 +201,105 @@ out: return 0; } +static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlm_flags) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_add) + ret = ds->drv->fdb_add(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_del) + ret = ds->drv->fdb_del(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb, + const unsigned char *addr, u16 vid, + bool is_static, + u32 portid, u32 seq, int type, + unsigned int flags) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_EXT_LEARNED; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dev->ifindex; + ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + if (vid && nla_put_u16(skb, NDA_VLAN, vid)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Dump information about entries, in response to GETNEIGH */ +static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + unsigned char addr[ETH_ALEN] = { 0 }; + int ret; + + if (!ds->drv->fdb_getnext) + return -EOPNOTSUPP; + + for (; ; idx++) { + bool is_static; + + ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static); + if (ret < 0) + break; + + if (idx < cb->args[0]) + continue; + + ret = dsa_slave_fill_info(dev, skb, addr, 0, + is_static, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI); + if (ret < 0) + break; + } + + return idx; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -572,6 +671,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, + .ndo_fdb_add = dsa_slave_fdb_add, + .ndo_fdb_del = dsa_slave_fdb_del, + .ndo_fdb_dump = dsa_slave_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, }; diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index d059182c1466..e7ad950cf9ef 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -10,8 +10,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 75101980eeee..076aadda0473 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -26,7 +28,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 023196f7ec37..18b80e8bc533 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5987,6 +5987,35 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, } EXPORT_SYMBOL(inet_reqsk_alloc); +/* + * Return true if a syncookie should be sent + */ +static bool tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) +{ + const char *msg = "Dropping request"; + bool want_cookie = false; + struct listen_sock *lopt; + +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) { + msg = "Sending cookies"; + want_cookie = true; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else +#endif + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e073517b2cc7..5aababa20a21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -856,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -/* - * Return true if a syncookie should be sent - */ -bool tcp_syn_flood_action(struct sock *sk, - const struct sk_buff *skb, - const char *proto) -{ - const char *msg = "Dropping request"; - bool want_cookie = false; - struct listen_sock *lopt; - -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - msg = "Sending cookies"; - want_cookie = true; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); - } else -#endif - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - - lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; - if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { - lopt->synflood_warned = 1; - pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); - } - return want_cookie; -} -EXPORT_SYMBOL(tcp_syn_flood_action); #ifdef CONFIG_TCP_MD5SIG /* diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 27ca79682efb..273eb26cd6d4 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -299,19 +299,16 @@ static int __net_init fib6_rules_net_init(struct net *net) ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); - net->ipv6.fib6_rules_ops = ops; - - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, - RT6_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0); if (err) goto out_fib6_rules_ops; - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, - 0x7FFE, RT6_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) goto out_fib6_rules_ops; + net->ipv6.fib6_rules_ops = ops; out: return err; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index bb00c6f2a885..83f59dc3cccc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -9,7 +9,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> #include <linux/capability.h> #include <linux/in.h> #include <linux/skbuff.h> @@ -234,7 +237,7 @@ static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 4, + .level = LOGLEVEL_WARNING, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index ddf07e6f59d7..8dd869642f45 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 971cd7526f4b..f70e34a68f70 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -522,7 +522,6 @@ config NFT_NAT typical Network Address Translation (NAT) packet transformations. config NFT_QUEUE - depends on NETFILTER_XTABLES depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" help diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 363a39a6c286..5604c2df05d1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx) static inline bool nft_rule_is_active(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << net->nft.gencursor)) == 0; -} - -static inline int gencursor_next(struct net *net) -{ - return net->nft.gencursor+1 == 1 ? 1 : 0; + return (rule->genmask & nft_genmask_cur(net)) == 0; } static inline int nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << gencursor_next(net))) == 0; + return (rule->genmask & nft_genmask_next(net)) == 0; } static inline void nft_rule_activate_next(struct net *net, struct nft_rule *rule) { /* Now inactive, will be active in the future */ - rule->genmask = (1 << net->nft.gencursor); + rule->genmask = nft_genmask_cur(net); } static inline void nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) { - rule->genmask = (1 << gencursor_next(net)); + rule->genmask = nft_genmask_next(net); } static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) { - rule->genmask &= ~(1 << gencursor_next(net)); + rule->genmask &= ~nft_genmask_next(net); } static int @@ -1354,6 +1349,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, rcu_assign_pointer(basechain->stats, stats); } + write_pnet(&basechain->pnet, net); basechain->type = type; chain = &basechain->chain; @@ -1381,7 +1377,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); - chain->net = net; chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); @@ -2695,6 +2690,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, goto err2; INIT_LIST_HEAD(&set->bindings); + write_pnet(&set->pnet, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -2771,10 +2767,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, + return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE); } @@ -2827,6 +2824,22 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, nf_tables_set_destroy(ctx, set); } +const struct nft_set_ext_type nft_set_ext_types[] = { + [NFT_SET_EXT_KEY] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_DATA] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_FLAGS] = { + .len = sizeof(u8), + .align = __alignof__(u8), + }, +}; +EXPORT_SYMBOL_GPL(nft_set_ext_types); + /* * Set elements */ @@ -2873,6 +2886,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -2880,20 +2894,20 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, - set->klen) < 0) + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), + NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && - nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, set->dlen) < 0) goto nla_put_failure; - if (elem->flags != 0) - if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) - goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, + htonl(*nft_set_ext_flags(ext)))) + goto nla_put_failure; nla_nest_end(skb, nest); return 0; @@ -3114,15 +3128,54 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } +static void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + gfp_t gfp) +{ + struct nft_set_ext *ext; + void *elem; + + elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); + if (elem == NULL) + return NULL; + + ext = nft_set_elem_ext(set, elem); + nft_set_ext_init(ext, tmpl); + + memcpy(nft_set_ext_key(ext), key, set->klen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + memcpy(nft_set_ext_data(ext), data, set->dlen); + + return elem; +} + +void nft_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), set->dtype); + + kfree(elem); +} +EXPORT_SYMBOL_GPL(nft_set_elem_destroy); + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; + struct nft_set_ext_tmpl tmpl; + struct nft_set_ext *ext; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u32 flags; int err; if (set->size && set->nelems == set->size) @@ -3136,22 +3189,26 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) return -EINVAL; - elem.flags = 0; + nft_set_ext_prepare(&tmpl); + + flags = 0; if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { - elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); - if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (flags & ~NFT_SET_ELEM_INTERVAL_END) return -EINVAL; if (!(set->flags & NFT_SET_INTERVAL) && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && - !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; if (nla[NFTA_SET_ELEM_DATA] != NULL && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) @@ -3165,12 +3222,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) goto err2; - err = -EEXIST; - if (set->ops->get(set, &elem) == 0) - goto err2; + nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; @@ -3187,29 +3242,43 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, }; err = nft_validate_data_load(&bind_ctx, dreg, - &elem.data, d2.type); + &data, d2.type); if (err < 0) goto err3; } + + nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); } + err = -ENOMEM; + elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL); + if (elem.priv == NULL) + goto err3; + + ext = nft_set_elem_ext(set, elem.priv); + if (flags) + *nft_set_ext_flags(ext) = flags; + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) - goto err3; + goto err4; + ext->genmask = nft_genmask_cur(ctx->net); err = set->ops->insert(set, &elem); if (err < 0) - goto err4; + goto err5; nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err4: +err5: kfree(trans); +err4: + kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&elem.data, d2.type); + nft_data_uninit(&data, d2.type); err2: nft_data_uninit(&elem.key, d1.type); err1: @@ -3282,19 +3351,24 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) goto err2; - err = set->ops->get(set, &elem); - if (err < 0) - goto err2; - trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) { err = -ENOMEM; goto err2; } + elem.priv = set->ops->deactivate(set, &elem); + if (elem.priv == NULL) { + err = -ENOENT; + goto err3; + } + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; + +err3: + kfree(trans); err2: nft_data_uninit(&elem.key, desc.type); err1: @@ -3532,6 +3606,10 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_DELSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3546,7 +3624,7 @@ static int nf_tables_commit(struct sk_buff *skb) while (++net->nft.base_seq == 0); /* A new generation has just started */ - net->nft.gencursor = gencursor_next(net); + net->nft.gencursor = nft_gencursor_next(net); /* Make sure all packets have left the previous generation before * purging old rules. @@ -3617,24 +3695,21 @@ static int nf_tables_commit(struct sk_buff *skb) NFT_MSG_DELSET, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - nf_tables_setelem_notify(&trans->ctx, - nft_trans_elem_set(trans), - &nft_trans_elem(trans), + te = (struct nft_trans_elem *)trans->data; + + te->set->ops->activate(te->set, &te->elem); + nf_tables_setelem_notify(&trans->ctx, te->set, + &te->elem, NFT_MSG_NEWSETELEM, 0); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); break; } } @@ -3666,6 +3741,10 @@ static void nf_tables_abort_release(struct nft_trans *trans) case NFT_MSG_NEWSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_NEWSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3736,16 +3815,15 @@ static int nf_tables_abort(struct sk_buff *skb) case NFT_MSG_NEWSETELEM: nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; - te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); + te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: + te = (struct nft_trans_elem *)trans->data; + nft_trans_elem_set(trans)->nelems++; + te->set->ops->activate(te->set, &te->elem); + nft_trans_destroy(trans); break; } @@ -3820,13 +3898,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { - if (elem->flags & NFT_SET_ELEM_INTERVAL_END) + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_data *data; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; - switch (elem->data.verdict) { + data = nft_set_ext_data(ext); + switch (data->verdict) { case NFT_JUMP: case NFT_GOTO: - return nf_tables_check_loops(ctx, elem->data.chain); + return nf_tables_check_loops(ctx, data->chain); default: return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 77165bf023f3..ef4dfcbaf149 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/list.h> @@ -37,7 +38,7 @@ static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 4, + .level = LOGLEVEL_WARNING, .logflags = NF_LOG_MASK, }, }, @@ -49,10 +50,10 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); } static inline void nft_trace_packet(const struct nft_pktinfo *pkt, @@ -112,6 +113,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; + const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; @@ -119,11 +121,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; struct nft_stats *stats; int rulenum; - /* - * Cache cursor to avoid problems in case that the cursor is updated - * while traversing the ruleset. - */ - unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); + unsigned int gencursor = nft_genmask_cur(net); do_chain: rulenum = 0; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index f9ce2195fd63..c7e1a9d7d46f 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -23,104 +23,130 @@ /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_HASH_ELEMENT_HINT 3 +struct nft_hash { + struct rhashtable ht; +}; + struct nft_hash_elem { struct rhash_head node; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; +}; + +struct nft_hash_cmp_arg { + const struct nft_set *set; + const struct nft_data *key; + u8 genmask; }; static const struct rhashtable_params nft_hash_params; -static bool nft_hash_lookup(const struct nft_set *set, - const struct nft_data *key, - struct nft_data *data) +static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) { - struct rhashtable *priv = nft_set_priv(set); - const struct nft_hash_elem *he; - - he = rhashtable_lookup_fast(priv, key, nft_hash_params); - if (he && set->flags & NFT_SET_MAP) - nft_data_copy(data, he->data); + const struct nft_hash_cmp_arg *arg = data; - return !!he; + return jhash(arg->key, len, seed); } -static int nft_hash_insert(const struct nft_set *set, - const struct nft_set_elem *elem) +static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) { - struct rhashtable *priv = nft_set_priv(set); - struct nft_hash_elem *he; - unsigned int size; - int err; + const struct nft_hash_elem *he = data; - if (elem->flags != 0) - return -EINVAL; + return jhash(nft_set_ext_key(&he->ext), len, seed); +} - size = sizeof(*he); - if (set->flags & NFT_SET_MAP) - size += sizeof(he->data[0]); +static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_hash_cmp_arg *x = arg->key; + const struct nft_hash_elem *he = ptr; - he = kzalloc(size, GFP_KERNEL); - if (he == NULL) - return -ENOMEM; + if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; + return 0; +} - nft_data_copy(&he->key, &elem->key); - if (set->flags & NFT_SET_MAP) - nft_data_copy(he->data, &elem->data); +static bool nft_hash_lookup(const struct nft_set *set, + const struct nft_data *key, + const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_cur(read_pnet(&set->pnet)), + .set = set, + .key = key, + }; - err = rhashtable_insert_fast(priv, &he->node, nft_hash_params); - if (err) - kfree(he); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + *ext = &he->ext; - return err; + return !!he; } -static void nft_hash_elem_destroy(const struct nft_set *set, - struct nft_hash_elem *he) +static int nft_hash_insert(const struct nft_set *set, + const struct nft_set_elem *elem) { - nft_data_uninit(&he->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(he->data, set->dtype); - kfree(he); + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .set = set, + .key = &elem->key, + }; + + return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); } -static void nft_hash_remove(const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_hash_activate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; - rhashtable_remove_fast(priv, elem->cookie, nft_hash_params); - synchronize_rcu(); - kfree(elem->cookie); + nft_set_elem_change_active(set, &he->ext); } -static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +static void *nft_hash_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .set = set, + .key = &elem->key, + }; - he = rhashtable_lookup_fast(priv, &elem->key, nft_hash_params); - if (!he) - return -ENOENT; + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + nft_set_elem_change_active(set, &he->ext); - elem->cookie = he; - elem->flags = 0; - if (set->flags & NFT_SET_MAP) - nft_data_copy(&elem->data, he->data); + return he; +} - return 0; +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - struct rhashtable *priv = nft_set_priv(set); - const struct nft_hash_elem *he; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; - err = rhashtable_walk_init(priv, &hti); + err = rhashtable_walk_init(&priv->ht, &hti); iter->err = err; if (err) return; @@ -144,11 +170,10 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&he->ext, genmask)) + goto cont; - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; + elem.priv = he; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -165,37 +190,40 @@ out: static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) { - return sizeof(struct rhashtable); + return sizeof(struct nft_hash); } static const struct rhashtable_params nft_hash_params = { - .head_offset = offsetof(struct nft_hash_elem, node), - .key_offset = offsetof(struct nft_hash_elem, key), - .hashfn = jhash, - .automatic_shrinking = true, + .head_offset = offsetof(struct nft_hash_elem, node), + .hashfn = nft_hash_key, + .obj_hashfn = nft_hash_obj, + .obj_cmpfn = nft_hash_cmp, + .automatic_shrinking = true, }; static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) { - struct rhashtable *priv = nft_set_priv(set); + struct nft_hash *priv = nft_set_priv(set); struct rhashtable_params params = nft_hash_params; params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; - params.key_len = set->klen; + params.key_len = set->klen; - return rhashtable_init(priv, ¶ms); + return rhashtable_init(&priv->ht, ¶ms); } -static void nft_free_element(void *ptr, void *arg) +static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_hash_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr); } static void nft_hash_destroy(const struct nft_set *set) { - rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element, + struct nft_hash *priv = nft_set_priv(set); + + rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, (void *)set); } @@ -205,11 +233,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, unsigned int esize; esize = sizeof(struct nft_hash_elem); - if (features & NFT_SET_MAP) - esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); - if (desc->size) { - est->size = sizeof(struct rhashtable) + + est->size = sizeof(struct nft_hash) + roundup_pow_of_two(desc->size * 4 / 3) * sizeof(struct nft_hash_elem *) + desc->size * esize; @@ -229,11 +254,13 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, - .get = nft_hash_get, .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index bde05f28cf14..e18af9db2f04 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -78,7 +78,7 @@ static int nft_log_init(const struct nft_ctx *ctx, li->u.log.level = ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); } else { - li->u.log.level = 4; + li->u.log.level = LOGLEVEL_WARNING; } if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 9615b8b9fb37..a5f30b8760ea 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -31,9 +31,13 @@ static void nft_lookup_eval(const struct nft_expr *expr, { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; + const struct nft_set_ext *ext; - if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + if (set->ops->lookup(set, &data[priv->sreg], &ext)) { + if (set->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext)); return; + } data[NFT_REG_VERDICT].verdict = NFT_BREAK; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index abe68119a76c..5197874372ec 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -153,7 +153,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, } break; case NFT_META_CPU: - dest->data[0] = smp_processor_id(); + dest->data[0] = raw_smp_processor_id(); break; case NFT_META_IIFGROUP: if (in == NULL) diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 2c75361077f7..42d0ca45fb9e 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -26,18 +26,18 @@ struct nft_rbtree { struct nft_rbtree_elem { struct rb_node node; - u16 flags; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; + static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data) + const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; const struct rb_node *parent; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; spin_lock_bh(&nft_rbtree_lock); @@ -45,7 +45,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = parent->rb_left; interval = rbe; @@ -53,12 +53,17 @@ static bool nft_rbtree_lookup(const struct nft_set *set, parent = parent->rb_right; else { found: - if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(&rbe->ext) & + NFT_SET_ELEM_INTERVAL_END) goto out; - if (set->flags & NFT_SET_MAP) - nft_data_copy(data, rbe->data); - spin_unlock_bh(&nft_rbtree_lock); + + *ext = &rbe->ext; return true; } } @@ -72,23 +77,13 @@ out: return false; } -static void nft_rbtree_elem_destroy(const struct nft_set *set, - struct nft_rbtree_elem *rbe) -{ - nft_data_uninit(&rbe->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(rbe->data, set->dtype); - - kfree(rbe); -} - static int __nft_rbtree_insert(const struct nft_set *set, struct nft_rbtree_elem *new) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; parent = NULL; @@ -96,13 +91,18 @@ static int __nft_rbtree_insert(const struct nft_set *set, while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &new->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), + nft_set_ext_key(&new->ext), + set->klen); if (d < 0) p = &parent->rb_left; else if (d > 0) p = &parent->rb_right; - else - return -EEXIST; + else { + if (nft_set_elem_active(&rbe->ext, genmask)) + return -EEXIST; + p = &parent->rb_left; + } } rb_link_node(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); @@ -112,31 +112,13 @@ static int __nft_rbtree_insert(const struct nft_set *set, static int nft_rbtree_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rbtree_elem *rbe; - unsigned int size; + struct nft_rbtree_elem *rbe = elem->priv; int err; - size = sizeof(*rbe); - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END)) - size += sizeof(rbe->data[0]); - - rbe = kzalloc(size, GFP_KERNEL); - if (rbe == NULL) - return -ENOMEM; - - rbe->flags = elem->flags; - nft_data_copy(&rbe->key, &elem->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(rbe->data, &elem->data); - spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); - if (err < 0) - kfree(rbe); - spin_unlock_bh(&nft_rbtree_lock); + return err; } @@ -144,39 +126,49 @@ static void nft_rbtree_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->cookie; + struct nft_rbtree_elem *rbe = elem->priv; spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); spin_unlock_bh(&nft_rbtree_lock); - kfree(rbe); } -static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +static void nft_rbtree_activate(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe = elem->priv; + + nft_set_elem_change_active(set, &rbe->ext); +} + +static void *nft_rbtree_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key, + set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) parent = parent->rb_right; else { - elem->cookie = rbe; - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem->data, rbe->data); - elem->flags = rbe->flags; - return 0; + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + nft_set_elem_change_active(set, &rbe->ext); + return rbe; } } - return -ENOENT; + return NULL; } static void nft_rbtree_walk(const struct nft_ctx *ctx, @@ -184,21 +176,21 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_rbtree *priv = nft_set_priv(set); - const struct nft_rbtree_elem *rbe; + struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&rbe->ext, genmask)) + goto cont; - rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_data_copy(&elem.key, &rbe->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem.data, rbe->data); - elem.flags = rbe->flags; + elem.priv = rbe; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { @@ -235,7 +227,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_rbtree_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe); } } @@ -245,9 +237,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, unsigned int nsize; nsize = sizeof(struct nft_rbtree_elem); - if (features & NFT_SET_MAP) - nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); - if (desc->size) est->size = sizeof(struct nft_rbtree) + desc->size * nsize; else @@ -260,12 +249,14 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, - .get = nft_rbtree_get, + .deactivate = nft_rbtree_deactivate, + .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4caa809dbbe0..19909d0786a2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3127,7 +3127,7 @@ static struct pernet_operations __net_initdata netlink_net_ops = { .exit = netlink_net_exit, }; -static inline u32 netlink_hash(const void *data, u32 seed) +static inline u32 netlink_hash(const void *data, u32 len, u32 seed) { const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 79355531c3e2..ae558dd7f8ee 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -62,21 +62,8 @@ static void tipc_bclink_lock(struct net *net) static void tipc_bclink_unlock(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node = NULL; - if (likely(!tn->bclink->flags)) { - spin_unlock_bh(&tn->bclink->lock); - return; - } - - if (tn->bclink->flags & TIPC_BCLINK_RESET) { - tn->bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(net); - } spin_unlock_bh(&tn->bclink->lock); - - if (node) - tipc_link_reset_all(node); } void tipc_bclink_input(struct net *net) @@ -91,13 +78,6 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(struct net *net, unsigned int flags) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tn->bclink->flags |= flags; -} - static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -156,7 +136,6 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) seqno : node->bclink.last_sent; } - /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * @@ -350,13 +329,12 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) return; tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && (n_ptr->bclink.last_in == msg_bcgap_after(msg))) n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); } /* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster @@ -476,17 +454,18 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) goto unlock; if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_node_unlock(node); tipc_bclink_lock(net); bcl->stats.recv_nacks++; tn->bclink->retransmit_to = node; bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); tipc_bclink_unlock(net); + tipc_node_unlock(node); } else { tipc_node_unlock(node); bclink_peek_nack(net, msg); } + tipc_node_put(node); goto exit; } @@ -591,6 +570,7 @@ receive: unlock: tipc_node_unlock(node); + tipc_node_put(node); exit: kfree_skb(buf); } diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 43f397fbac55..4bdc12277d33 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,6 @@ struct tipc_bcbearer_pair { struct tipc_bearer *secondary; }; -#define TIPC_BCLINK_RESET 1 #define BCBEARER MAX_BEARERS /** @@ -86,7 +85,6 @@ struct tipc_bcbearer { * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * @@ -96,7 +94,6 @@ struct tipc_bclink { spinlock_t lock; struct tipc_link link; struct tipc_node node; - unsigned int flags; struct sk_buff_head arrvq; struct sk_buff_head inputq; struct tipc_node_map bcast_nodes; @@ -117,7 +114,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, int tipc_bclink_init(struct net *net); void tipc_bclink_stop(struct net *net); -void tipc_bclink_set_flags(struct net *tn, unsigned int flags); void tipc_bclink_add_node(struct net *net, u32 addr); void tipc_bclink_remove_node(struct net *net, u32 addr); struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 169f3dd038b9..967e292f53c8 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -260,6 +260,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, } } tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/link.c b/net/tipc/link.c index 1287161e9424..514466efc25c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -854,6 +854,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, if (link) rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); + tipc_node_put(node); } if (link) return rc; @@ -980,7 +981,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, (unsigned long) TIPC_SKB_CB(buf)->handle); n_ptr = tipc_bclink_retransmit_to(net); - tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); pr_info("Broadcast link info for %s\n", addr_string); @@ -992,9 +992,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_node_unlock(n_ptr); - - tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); + n_ptr->action_flags |= TIPC_BCAST_RESET; l_ptr->stale_count = 0; } } @@ -1119,8 +1117,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; - tipc_node_lock(n_ptr); + tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) @@ -1208,6 +1206,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; unlock: tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); discard: if (unlikely(skb)) kfree_skb(skb); @@ -2239,7 +2238,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - if (prev_node) { node = tipc_node_find(net, prev_node); if (!node) { @@ -2252,6 +2250,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 1; goto out; } + tipc_node_put(node); list_for_each_entry_continue_rcu(node, &tn->node_list, list) { @@ -2259,6 +2258,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); + tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 506aaa565da7..41e7b7e4dda0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -244,6 +244,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_add_tail(&publ->nodesub_list, &node->publ_list); tipc_node_unlock(node); + tipc_node_put(node); } static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, @@ -258,6 +259,7 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_del_init(&publ->nodesub_list); tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index 26d1de1bf34d..3e4f04897c03 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,7 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); +static void tipc_node_delete(struct tipc_node *node); struct tipc_sock_conn { u32 port; @@ -67,6 +68,23 @@ static unsigned int tipc_hashfn(u32 addr) return addr & (NODE_HTABLE_SIZE - 1); } +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *node = container_of(kref, struct tipc_node, kref); + + tipc_node_delete(node); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + /* * tipc_node_find - locate specified node object, if it exists */ @@ -82,6 +100,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) { + tipc_node_get(node); rcu_read_unlock(); return node; } @@ -106,6 +125,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) } n_ptr->addr = addr; n_ptr->net = net; + kref_init(&n_ptr->kref); spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); @@ -120,16 +140,17 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; + tipc_node_get(n_ptr); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_node *node) { - list_del_rcu(&n_ptr->list); - hlist_del_rcu(&n_ptr->hash); - kfree_rcu(n_ptr, rcu); + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + kfree_rcu(node, rcu); } void tipc_node_stop(struct net *net) @@ -139,7 +160,7 @@ void tipc_node_stop(struct net *net) spin_lock_bh(&tn->node_list_lock); list_for_each_entry_safe(node, t_node, &tn->node_list, list) - tipc_node_delete(tn, node); + tipc_node_put(node); spin_unlock_bh(&tn->node_list_lock); } @@ -147,6 +168,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; + int err = 0; if (in_own_node(net, dnode)) return 0; @@ -157,8 +179,10 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) return -EHOSTUNREACH; } conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) - return -EHOSTUNREACH; + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } conn->peer_node = dnode; conn->port = port; conn->peer_port = peer_port; @@ -166,7 +190,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) tipc_node_lock(node); list_add_tail(&conn->list, &node->conn_sks); tipc_node_unlock(node); - return 0; +exit: + tipc_node_put(node); + return err; } void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) @@ -189,6 +215,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) kfree(conn); } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -417,19 +444,25 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) { struct tipc_link *link; + int err = -EINVAL; struct tipc_node *node = tipc_node_find(net, addr); - if ((bearer_id >= MAX_BEARERS) || !node) - return -EINVAL; + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + tipc_node_lock(node); link = node->links[bearer_id]; if (link) { strncpy(linkname, link->name, len); - tipc_node_unlock(node); - return 0; + err = 0; } +exit: tipc_node_unlock(node); - return -EINVAL; + tipc_node_put(node); + return err; } void tipc_node_unlock(struct tipc_node *node) @@ -459,7 +492,7 @@ void tipc_node_unlock(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_NAMED_MSG_EVT); + TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); spin_unlock_bh(&node->lock); @@ -488,6 +521,9 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_BCAST_MSG_EVT) tipc_bclink_input(net); + + if (flags & TIPC_BCAST_RESET) + tipc_link_reset_all(node); } /* Caller should hold node lock for the passed node */ @@ -542,17 +578,21 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - - if (last_addr && !tipc_node_find(net, last_addr)) { - rcu_read_unlock(); - /* We never set seq or call nl_dump_check_consistent() this - * means that setting prev_seq here will cause the consistence - * check to fail in the netlink callback handler. Resulting in - * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if - * the node state changed while we released the lock. - */ - cb->prev_seq = 1; - return -EPIPE; + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); } list_for_each_entry_rcu(node, &tn->node_list, list) { diff --git a/net/tipc/node.h b/net/tipc/node.h index e89ac04ec2c3..02d5c20dc551 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,8 @@ enum { TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7), TIPC_NAMED_MSG_EVT = (1 << 8), - TIPC_BCAST_MSG_EVT = (1 << 9) + TIPC_BCAST_MSG_EVT = (1 << 9), + TIPC_BCAST_RESET = (1 << 10) }; /** @@ -93,6 +94,7 @@ struct tipc_node_bclink { /** * struct tipc_node - TIPC node structure * @addr: network address of node + * @ref: reference counter to node object * @lock: spinlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain @@ -114,6 +116,7 @@ struct tipc_node_bclink { */ struct tipc_node { u32 addr; + struct kref kref; spinlock_t lock; struct net *net; struct hlist_node hash; @@ -136,6 +139,7 @@ struct tipc_node { }; struct tipc_node *tipc_node_find(struct net *net, u32 addr); +void tipc_node_put(struct tipc_node *node); struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); @@ -170,10 +174,12 @@ static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) node = tipc_node_find(net, addr); - if (likely(node)) + if (likely(node)) { mtu = node->act_mtus[selector & 1]; - else + tipc_node_put(node); + } else { mtu = MAX_MSG_SIZE; + } return mtu; } |