diff options
Diffstat (limited to 'drivers/net')
48 files changed, 5843 insertions, 969 deletions
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 1d71802396fb..25bd3fa744d9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,6 +216,22 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_write(chip, addr, reg, val); +} + static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, int reg, u16 *val) { @@ -585,23 +601,23 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct mv88e6xxx_chip *chip = ds->priv; - u32 reg; - int ret; + u16 reg; + int err; if (!phy_is_pseudo_fixed_link(phydev)) return; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) goto out; - reg = ret & ~(PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX | - PORT_PCS_CTRL_UNFORCED); + reg &= ~(PORT_PCS_CTRL_LINK_UP | + PORT_PCS_CTRL_FORCE_LINK | + PORT_PCS_CTRL_DUPLEX_FULL | + PORT_PCS_CTRL_FORCE_DUPLEX | + PORT_PCS_CTRL_UNFORCED); reg |= PORT_PCS_CTRL_FORCE_LINK; if (phydev->link) @@ -639,7 +655,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | PORT_PCS_CTRL_RGMII_DELAY_TXCLK); } - _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg); + mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); out: mutex_unlock(&chip->reg_lock); @@ -799,22 +815,22 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, { u32 low; u32 high = 0; - int ret; + int err; + u16 reg; u64 value; switch (s->type) { case PORT: - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg, ®); + if (err) return UINT64_MAX; - low = ret; + low = reg; if (s->sizeof_stat == 4) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), - s->reg + 1); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®); + if (err) return UINT64_MAX; - high = ret; + high = reg; } break; case BANK0: @@ -893,6 +909,8 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { struct mv88e6xxx_chip *chip = ds->priv; + int err; + u16 reg; u16 *p = _p; int i; @@ -903,11 +921,10 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); for (i = 0; i < 32; i++) { - int ret; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i); - if (ret >= 0) - p[i] = ret; + err = mv88e6xxx_port_read(chip, port, i, ®); + if (!err) + p[i] = reg; } mutex_unlock(&chip->reg_lock); @@ -938,7 +955,7 @@ static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, ®); + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); if (err) goto out; @@ -1106,12 +1123,13 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, u8 state) { struct dsa_switch *ds = chip->ds; - int reg, ret = 0; + u16 reg; + int err; u8 oldstate; - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); + if (err) + return err; oldstate = reg & PORT_CONTROL_STATE_MASK; @@ -1124,23 +1142,22 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, oldstate == PORT_CONTROL_STATE_FORWARDING) && (state == PORT_CONTROL_STATE_DISABLED || state == PORT_CONTROL_STATE_BLOCKING)) { - ret = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (ret) - return ret; + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + if (err) + return err; } reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", mv88e6xxx_port_state_names[state], mv88e6xxx_port_state_names[oldstate]); } - return ret; + return err; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) @@ -1149,7 +1166,8 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) const u16 mask = (1 << chip->info->num_ports) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; - int reg; + u16 reg; + int err; int i; /* allow CPU port or DSA link(s) to send frames to every port */ @@ -1170,14 +1188,14 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; reg &= ~mask; reg |= output_ports & mask; - return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg); + return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); } static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, @@ -1218,23 +1236,22 @@ static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { struct dsa_switch *ds = chip->ds; - u16 pvid; - int ret; + u16 pvid, reg; + int err; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; - pvid = ret & PORT_DEFAULT_VLAN_MASK; + pvid = reg & PORT_DEFAULT_VLAN_MASK; if (new) { - ret &= ~PORT_DEFAULT_VLAN_MASK; - ret |= *new & PORT_DEFAULT_VLAN_MASK; + reg &= ~PORT_DEFAULT_VLAN_MASK; + reg |= *new & PORT_DEFAULT_VLAN_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_DEFAULT_VLAN, ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "DefaultVID %d (was %d)\n", *new, pvid); @@ -1613,7 +1630,8 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, struct dsa_switch *ds = chip->ds; u16 upper_mask; u16 fid; - int ret; + u16 reg; + int err; if (mv88e6xxx_num_databases(chip) == 4096) upper_mask = 0xff; @@ -1623,37 +1641,35 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, return -EOPNOTSUPP; /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; - fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; + fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; if (new) { - ret &= ~PORT_BASE_VLAN_FID_3_0_MASK; - ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; + reg &= ~PORT_BASE_VLAN_FID_3_0_MASK; + reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; } /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; - fid |= (ret & upper_mask) << 4; + fid |= (reg & upper_mask) << 4; if (new) { - ret &= ~upper_mask; - ret |= (*new >> 4) & upper_mask; + reg &= ~upper_mask; + reg |= (*new >> 4) & upper_mask; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "FID %d (was %d)\n", *new, fid); @@ -1865,26 +1881,26 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds->priv; u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; - int ret; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) goto unlock; - old = ret & PORT_CONTROL_2_8021Q_MASK; + old = reg & PORT_CONTROL_2_8021Q_MASK; if (new != old) { - ret &= ~PORT_CONTROL_2_8021Q_MASK; - ret |= new & PORT_CONTROL_2_8021Q_MASK; + reg &= ~PORT_CONTROL_2_8021Q_MASK; + reg |= new & PORT_CONTROL_2_8021Q_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2, - ret); - if (ret < 0) + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) goto unlock; netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n", @@ -1892,11 +1908,11 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, mv88e6xxx_port_8021q_mode_names[old]); } - ret = 0; + err = 0; unlock: mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int @@ -2406,19 +2422,20 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) u16 is_reset = (ppu_active ? 0x8800 : 0xc800); struct gpio_desc *gpiod = chip->reset; unsigned long timeout; - int ret; + int err, ret; + u16 reg; int i; /* Set all ports to the disabled state. */ for (i = 0; i < chip->info->num_ports; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL, - ret & 0xfffc); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, i, PORT_CONTROL, + reg & 0xfffc); + if (err) + return err; } /* Wait for transmit queues to drain. */ @@ -2437,11 +2454,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) * through global registers 0x18 and 0x19. */ if (ppu_active) - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); else - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); - if (ret) - return ret; + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); + if (err) + return err; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -2455,11 +2472,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) usleep_range(1000, 2000); } if (time_after(jiffies, timeout)) - ret = -ETIMEDOUT; + err = -ETIMEDOUT; else - ret = 0; + err = 0; - return ret; + return err; } static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) @@ -2480,21 +2497,10 @@ static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) return err; } -static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, - int reg, u16 *val) -{ - int addr = chip->info->port_base_addr + port; - - if (port >= chip->info->num_ports) - return -EINVAL; - - return mv88e6xxx_read(chip, addr, reg, val); -} - static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) { struct dsa_switch *ds = chip->ds; - int ret; + int err; u16 reg; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || @@ -2507,7 +2513,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * and all DSA ports to their maximum bandwidth and * full duplex. */ - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | @@ -2522,10 +2528,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_PCS_CTRL_UNFORCED; } - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PCS_CTRL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; } /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, @@ -2576,26 +2581,25 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; } /* If this port is connected to a SerDes, make sure the SerDes is not * powered down. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); - if (ret < 0) - return ret; - ret &= PORT_STATUS_CMODE_MASK; - if ((ret == PORT_STATUS_CMODE_100BASE_X) || - (ret == PORT_STATUS_CMODE_1000BASE_X) || - (ret == PORT_STATUS_CMODE_SGMII)) { - ret = mv88e6xxx_serdes_power_on(chip); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) + return err; + reg &= PORT_STATUS_CMODE_MASK; + if ((reg == PORT_STATUS_CMODE_100BASE_X) || + (reg == PORT_STATUS_CMODE_1000BASE_X) || + (reg == PORT_STATUS_CMODE_SGMII)) { + err = mv88e6xxx_serdes_power_on(chip); + if (err < 0) + return err; } } @@ -2629,10 +2633,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_CONTROL_2_8021Q_DISABLED; if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL_2, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) + return err; } /* Port Association Vector: when learning source addresses @@ -2645,16 +2648,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port)) reg = 0; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg); + if (err) + return err; /* Egress rate control 2: disable egress rate control. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000); + if (err) + return err; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || @@ -2663,96 +2664,89 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * be paused for by the remote end or the period of * time that this port can pause the remote end. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PAUSE_CTRL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000); + if (err) + return err; /* Port ATU control: disable limiting the number of * address database entries that this port is allowed * to use. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ATU_CONTROL, 0x0000); + err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL, + 0x0000); /* Priority Override: disable DA, SA and VTU priority * override. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PRI_OVERRIDE, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE, + 0x0000); + if (err) + return err; /* Port Ethertype: use the Ethertype DSA Ethertype * value. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ETH_TYPE, ETH_P_EDSA); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE, + ETH_P_EDSA); + if (err) + return err; } /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_0123, 0x3210); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123, + 0x3210); + if (err) + return err; /* Tag Remap 2: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_4567, 0x7654); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567, + 0x7654); + if (err) + return err; } /* Rate Control: disable ingress rate limiting. */ if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || mv88e6xxx_6320_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0001); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0001); + if (err) + return err; } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0000); + if (err) + return err; } /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000); + if (err) + return err; /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - ret = _mv88e6xxx_port_fid_set(chip, port, 0); - if (ret) - return ret; + err = _mv88e6xxx_port_fid_set(chip, port, 0); + if (err) + return err; - ret = _mv88e6xxx_port_based_vlan_map(chip, port); - if (ret) - return ret; + err = _mv88e6xxx_port_based_vlan_map(chip, port); + if (err) + return err; /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN, - 0x0000); - if (ret) - return ret; - - return 0; + return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 52f3f5231f51..827988397fd8 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -37,7 +37,6 @@ #define ADDR_SERDES 0x0f #define SERDES_PAGE_FIBER 0x01 -#define REG_PORT(p) (0x10 + (p)) #define PORT_STATUS 0x00 #define PORT_STATUS_PAUSE_EN BIT(15) #define PORT_STATUS_MY_PAUSE BIT(14) @@ -453,36 +452,36 @@ enum mv88e6xxx_cap { }; /* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_EDSA BIT(MV88E6XXX_CAP_EDSA) -#define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) - -#define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD) -#define MV88E6XXX_FLAG_SMI_DATA BIT(MV88E6XXX_CAP_SMI_DATA) - -#define MV88E6XXX_FLAG_PHY_PAGE BIT(MV88E6XXX_CAP_PHY_PAGE) - -#define MV88E6XXX_FLAG_SERDES BIT(MV88E6XXX_CAP_SERDES) - -#define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) -#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) -#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) -#define MV88E6XXX_FLAG_G2_IRL_CMD BIT(MV88E6XXX_CAP_G2_IRL_CMD) -#define MV88E6XXX_FLAG_G2_IRL_DATA BIT(MV88E6XXX_CAP_G2_IRL_DATA) -#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT(MV88E6XXX_CAP_G2_PVT_ADDR) -#define MV88E6XXX_FLAG_G2_PVT_DATA BIT(MV88E6XXX_CAP_G2_PVT_DATA) -#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT(MV88E6XXX_CAP_G2_SWITCH_MAC) -#define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) -#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD) -#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD) -#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA) - -#define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) -#define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) -#define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU) -#define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) -#define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) -#define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU) +#define MV88E6XXX_FLAG_EDSA BIT_ULL(MV88E6XXX_CAP_EDSA) +#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE) + +#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD) +#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA) + +#define MV88E6XXX_FLAG_PHY_PAGE BIT_ULL(MV88E6XXX_CAP_PHY_PAGE) + +#define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES) + +#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) +#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) +#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) +#define MV88E6XXX_FLAG_G2_IRL_CMD BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD) +#define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA) +#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) +#define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) +#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC) +#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) +#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) +#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) +#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD) +#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA) + +#define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) +#define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) +#define MV88E6XXX_FLAG_STU BIT_ULL(MV88E6XXX_CAP_STU) +#define MV88E6XXX_FLAG_TEMP BIT_ULL(MV88E6XXX_CAP_TEMP) +#define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) +#define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) /* EEPROM Programming via Global2 with 16-bit data */ #define MV88E6XXX_FLAGS_EEPROM16 \ @@ -615,7 +614,7 @@ struct mv88e6xxx_info { unsigned int num_ports; unsigned int port_base_addr; unsigned int age_time_coeff; - unsigned long flags; + unsigned long long flags; }; struct mv88e6xxx_atu_entry { diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 4788a89520dd..b3df70d07ff6 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -256,7 +256,7 @@ static struct regmap_access_table qca8k_readable_table = { .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), }; -struct regmap_config qca8k_regmap_config = { +static struct regmap_config qca8k_regmap_config = { .reg_bits = 16, .val_bits = 32, .reg_stride = 4, @@ -1032,19 +1032,7 @@ static struct mdio_driver qca8kmdio_driver = { }, }; -static int __init -qca8kmdio_driver_register(void) -{ - return mdio_driver_register(&qca8kmdio_driver); -} -module_init(qca8kmdio_driver_register); - -static void __exit -qca8kmdio_driver_unregister(void) -{ - mdio_driver_unregister(&qca8kmdio_driver); -} -module_exit(qca8kmdio_driver_unregister); +mdio_module_driver(qca8kmdio_driver); MODULE_AUTHOR("Mathieu Olivari, John Crispin <john@phrozen.org>"); MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family"); diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 246129650967..c6b71f656992 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 1f9867db3b78..ea0d1f188802 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -851,6 +851,9 @@ struct adapter { spinlock_t stats_lock; spinlock_t win0_lock ____cacheline_aligned_in_smp; + + /* TC u32 offload */ + struct cxgb4_tc_u32_table *tc_u32; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be @@ -1025,6 +1028,32 @@ enum { VLAN_REWRITE }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct filter_ctx *ctx; /* Caller's completion hook */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct net_device *dev; /* Associated net device */ + u32 tid; /* This will store the actual tid */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 52be9a4ef97a..20455d082cb8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2748,12 +2748,6 @@ static void add_debugfs_mem(struct adapter *adap, const char *name, size_mb << 20); } -static int blocked_fl_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { @@ -2797,7 +2791,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, static const struct file_operations blocked_fl_fops = { .owner = THIS_MODULE, - .open = blocked_fl_open, + .open = simple_open, .read = blocked_fl_read, .write = blocked_fl_write, .llseek = generic_file_llseek, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c new file mode 100644 index 000000000000..2a616171cb68 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -0,0 +1,721 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "t4_regs.h" +#include "l2t.h" +#include "t4fw_api.h" +#include "cxgb4_filter.h" + +static inline bool is_field_set(u32 val, u32 mask) +{ + return val || mask; +} + +static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask) +{ + return !(conf & conf_mask) && is_field_set(val, mask); +} + +/* Validate filter spec against configuration done on the card. */ +static int validate_filter(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + u32 fconf, iconf; + + /* Check for unconfigured fields being used. */ + fconf = adapter->params.tp.vlan_pri_map; + iconf = adapter->params.tp.ingress_config; + + if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) || + unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) || + unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) || + unsupported(fconf, ETHERTYPE_F, fs->val.ethtype, + fs->mask.ethtype) || + unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) || + unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype, + fs->mask.matchtype) || + unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) || + unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) || + unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld, + fs->mask.pfvf_vld) || + unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld, + fs->mask.ovlan_vld) || + unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld)) + return -EOPNOTSUPP; + + /* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer + * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set + * in TP_INGRESS_CONFIG. Hense the somewhat crazy checks + * below. Additionally, since the T4 firmware interface also + * carries that overlap, we need to translate any PF/VF + * specification into that internal format below. + */ + if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) && + is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld)) + return -EOPNOTSUPP; + if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) || + (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) && + (iconf & VNIC_F))) + return -EOPNOTSUPP; + if (fs->val.pf > 0x7 || fs->val.vf > 0x7f) + return -ERANGE; + fs->mask.pf &= 0x7; + fs->mask.vf &= 0x7f; + + /* If the user is requesting that the filter action loop + * matching packets back out one of our ports, make sure that + * the egress port is in range. + */ + if (fs->action == FILTER_SWITCH && + fs->eport >= adapter->params.nports) + return -ERANGE; + + /* Don't allow various trivially obvious bogus out-of-range values... */ + if (fs->val.iport >= adapter->params.nports) + return -ERANGE; + + /* T4 doesn't support removing VLAN Tags for loop back filters. */ + if (is_t4(adapter->params.chip) && + fs->action == FILTER_SWITCH && + (fs->newvlan == VLAN_REMOVE || + fs->newvlan == VLAN_REWRITE)) + return -EOPNOTSUPP; + + return 0; +} + +static unsigned int get_filter_steerq(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int iq; + + /* If the user has requested steering matching Ingress Packets + * to a specific Queue Set, we need to make sure it's in range + * for the port and map that into the Absolute Queue ID of the + * Queue Set's Response Queue. + */ + if (!fs->dirsteer) { + if (fs->iq) + return -EINVAL; + iq = 0; + } else { + struct port_info *pi = netdev_priv(dev); + + /* If the iq id is greater than the number of qsets, + * then assume it is an absolute qid. + */ + if (fs->iq < pi->nqsets) + iq = adapter->sge.ethrxq[pi->first_qset + + fs->iq].rspq.abs_id; + else + iq = fs->iq; + } + + return iq; +} + +static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->ftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->ftid_bmap); + else + bitmap_allocate_region(t->ftid_bmap, fidx, 2); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + +static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) + __clear_bit(fidx, t->ftid_bmap); + else + bitmap_release_region(t->ftid_bmap, fidx, 2); + spin_unlock_bh(&t->ftid_lock); +} + +/* Delete the filter at a specified index. */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + struct sk_buff *skb; + unsigned int len; + + len = sizeof(*fwr); + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, + f->fs.eport, f->fs.dmac); + if (!f->l2t) { + kfree_skb(skb); + return -ENOMEM; + } + } + + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); + fwr->tid_to_iq = + htonl(FW_FILTER_WR_TID_V(f->tid) | + FW_FILTER_WR_RQTYPE_V(f->fs.type) | + FW_FILTER_WR_NOREPLY_V(0) | + FW_FILTER_WR_IQ_V(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | + FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | + FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | + FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | + FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | + FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | + FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | + FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | + FW_FILTER_WR_TXCHAN_V(f->fs.eport) | + FW_FILTER_WR_PRIO_V(f->fs.prio) | + FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | + FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | + FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | + FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(FW_FILTER_WR_RX_CHAN_V(0) | + FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | + FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | + FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | + FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | + FW_FILTER_WR_PORT_V(f->fs.val.iport) | + FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | + FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | + FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Return an error number if the indicated filter isn't writable ... */ +int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +void clear_all_filters(struct adapter *adapter) +{ + unsigned int i; + + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + unsigned int max_ftid = adapter->tids.nftids + + adapter->tids.nsftids; + + for (i = 0; i < max_ftid; i++, f++) + if (f->valid || f->pending) + clear_filter(adapter, f); + } +} + +/* Fill up default masks for set match fields. */ +static void fill_default_mask(struct ch_filter_specification *fs) +{ + unsigned int lip = 0, lip_mask = 0; + unsigned int fip = 0, fip_mask = 0; + unsigned int i; + + if (fs->val.iport && !fs->mask.iport) + fs->mask.iport |= ~0; + if (fs->val.fcoe && !fs->mask.fcoe) + fs->mask.fcoe |= ~0; + if (fs->val.matchtype && !fs->mask.matchtype) + fs->mask.matchtype |= ~0; + if (fs->val.macidx && !fs->mask.macidx) + fs->mask.macidx |= ~0; + if (fs->val.ethtype && !fs->mask.ethtype) + fs->mask.ethtype |= ~0; + if (fs->val.ivlan && !fs->mask.ivlan) + fs->mask.ivlan |= ~0; + if (fs->val.ovlan && !fs->mask.ovlan) + fs->mask.ovlan |= ~0; + if (fs->val.frag && !fs->mask.frag) + fs->mask.frag |= ~0; + if (fs->val.tos && !fs->mask.tos) + fs->mask.tos |= ~0; + if (fs->val.proto && !fs->mask.proto) + fs->mask.proto |= ~0; + + for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) { + lip |= fs->val.lip[i]; + lip_mask |= fs->mask.lip[i]; + fip |= fs->val.fip[i]; + fip_mask |= fs->mask.fip[i]; + } + + if (lip && !lip_mask) + memset(fs->mask.lip, ~0, sizeof(fs->mask.lip)); + + if (fip && !fip_mask) + memset(fs->mask.fip, ~0, sizeof(fs->mask.lip)); + + if (fs->val.lport && !fs->mask.lport) + fs->mask.lport = ~0; + if (fs->val.fport && !fs->mask.fport) + fs->mask.fport = ~0; +} + +/* Check a Chelsio Filter Request for validity, convert it into our internal + * format and send it to the hardware. Return 0 on success, an error number + * otherwise. We attach any provided filter operation context to the internal + * filter specification in order to facilitate signaling completion of the + * operation. + */ +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int max_fidx, fidx, iq; + struct filter_entry *f; + u32 iconf; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + fill_default_mask(fs); + + ret = validate_filter(dev, fs); + if (ret) + return ret; + + iq = get_filter_steerq(dev, fs); + if (iq < 0) + return iq; + + /* IPv6 filters occupy four slots and must be aligned on + * four-slot boundaries. IPv4 filters only occupy a single + * slot and have no alignment requirements but writing a new + * IPv4 filter into the middle of an existing IPv6 filter + * requires clearing the old IPv6 filter and hence we prevent + * insertion. + */ + if (fs->type == 0) { /* IPv4 */ + /* If our IPv4 filter isn't being written to a + * multiple of four filter index and there's an IPv6 + * filter at the multiple of 4 base slot, then we + * prevent insertion. + */ + fidx = filter_id & ~0x3; + if (fidx != filter_id && + adapter->tids.ftid_tab[fidx].fs.type) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", + fidx, fidx + 3); + return -EINVAL; + } + } + } else { /* IPv6 */ + /* Ensure that the IPv6 filter is aligned on a + * multiple of 4 boundary. + */ + if (filter_id & 0x3) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 must be aligned on a 4-slot boundary\n"); + return -EINVAL; + } + + /* Check all except the base overlapping IPv4 filter slots. */ + for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", + fidx); + return -EINVAL; + } + } + } + + /* Check to make sure that provided filter index is not + * already in use by someone else + */ + f = &adapter->tids.ftid_tab[filter_id]; + if (f->valid) + return -EBUSY; + + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + if (ret) + return ret; + + /* Check to make sure the filter requested is writable ... */ + ret = writable_filter(f); + if (ret) { + /* Clear the bits we have set above */ + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + return ret; + } + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adapter, f); + + /* Convert the filter specification into our internal format. + * We copy the PF/VF specification into the Outer VLAN field + * here so the rest of the code -- including the interface to + * the firmware -- doesn't have to constantly do these checks. + */ + f->fs = *fs; + f->fs.iq = iq; + f->dev = dev; + + iconf = adapter->params.tp.ingress_config; + if (iconf & VNIC_F) { + f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf; + f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf; + f->fs.val.ovlan_vld = fs->val.pfvf_vld; + f->fs.mask.ovlan_vld = fs->mask.pfvf_vld; + } + + /* Attempt to set the filter. If we don't succeed, we clear + * it and return the failure. + */ + f->ctx = ctx; + f->tid = fidx; /* Save the actual tid */ + ret = set_filter_wr(adapter, filter_id); + if (ret) { + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + clear_filter(adapter, f); + } + + return ret; +} + +/* Check a delete filter request for validity and send it to the hardware. + * Return 0 on success, an error number otherwise. We attach any provided + * filter operation context to the internal filter specification in order to + * facilitate signaling completion of the operation. + */ +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct filter_entry *f; + unsigned int max_fidx; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + f = &adapter->tids.ftid_tab[filter_id]; + ret = writable_filter(f); + if (ret) + return ret; + + if (f->valid) { + f->ctx = ctx; + cxgb4_clear_ftid(&adapter->tids, filter_id, + f->fs.type ? PF_INET6 : PF_INET); + return del_filter_wr(adapter, filter_id); + } + + /* If the caller has passed in a Completion Context then we need to + * mark it as a successful completion so they don't stall waiting + * for it. + */ + if (ctx) { + ctx->result = 0; + complete(&ctx->completion); + } + return ret; +} + +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + +int cxgb4_del_filter(struct net_device *dev, int filter_id) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_del_filter(dev, filter_id, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + +/* Handle a filter write/deletion reply. */ +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int tid = GET_TID(rpl); + struct filter_entry *f = NULL; + unsigned int max_fidx; + int idx; + + max_fidx = adap->tids.nftids + adap->tids.nsftids; + /* Get the corresponding filter entry for this tid */ + if (adap->tids.ftid_tab) { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; + f = &adap->tids.ftid_tab[idx]; + if (f->tid != tid) + return; + } + + /* We found the filter entry for this tid */ + if (f) { + unsigned int ret = TCB_COOKIE_G(rpl->cookie); + struct filter_ctx *ctx; + + /* Pull off any filter operation context attached to the + * filter. + */ + ctx = f->ctx; + f->ctx = NULL; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + if (ctx) + ctx->result = 0; + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + if (ctx) + ctx->result = -ENOMEM; + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; + } + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + if (ctx) + ctx->result = -EINVAL; + } + if (ctx) + complete(&ctx->completion); + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h new file mode 100644 index 000000000000..23742cb1c69f --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -0,0 +1,48 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_FILTER_H +#define __CXGB4_FILTER_H + +#include "t4_msg.h" + +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); +void clear_filter(struct adapter *adap, struct filter_entry *f); + +int set_filter_wr(struct adapter *adapter, int fidx); +int delete_filter(struct adapter *adapter, unsigned int fidx); + +int writable_filter(struct filter_entry *f); +void clear_all_filters(struct adapter *adapter); +#endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d1ebb84c073e..1be4d235a576 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -67,6 +67,7 @@ #include <linux/crash_dump.h> #include "cxgb4.h" +#include "cxgb4_filter.h" #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" @@ -77,6 +78,7 @@ #include "clip_tbl.h" #include "l2t.h" #include "sched.h" +#include "cxgb4_tc_u32.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -87,30 +89,6 @@ char cxgb4_driver_name[] = KBUILD_MODNAME; const char cxgb4_driver_version[] = DRV_VERSION; #define DRV_DESC "Chelsio T4/T5/T6 Network Driver" -/* Host shadow copy of ingress filter entry. This is in host native format - * and doesn't match the ordering or bit order, etc. of the hardware of the - * firmware command. The use of bit-field structure elements is purely to - * remind ourselves of the field size limitations and save memory in the case - * where the filter table is large. - */ -struct filter_entry { - /* Administrative fields for filter. - */ - u32 valid:1; /* filter allocated and valid */ - u32 locked:1; /* filter is administratively locked */ - - u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ - - /* The filter itself. Most of this is a straight copy of information - * provided by the extended ioctl(). Some fields are translated to - * internal forms -- for instance the Ingress Queue ID passed in from - * the ioctl() is translated into the Absolute Ingress Queue ID. - */ - struct ch_filter_specification fs; -}; - #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -527,66 +505,6 @@ static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd) } #endif /* CONFIG_CHELSIO_T4_DCB */ -/* Clear a filter and release any of its resources that we own. This also - * clears the filter's "pending" status. - */ -static void clear_filter(struct adapter *adap, struct filter_entry *f) -{ - /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. - */ - if (f->l2t) - cxgb4_l2t_release(f->l2t); - - /* The zeroing of the filter rule below clears the filter valid, - * pending, locked flags, l2t pointer, etc. so it's all we need for - * this operation. - */ - memset(f, 0, sizeof(*f)); -} - -/* Handle a filter write/deletion reply. - */ -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) -{ - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - unsigned int ret; - struct filter_entry *f; - - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); - f = &adap->tids.ftid_tab[idx]; - - if (ret == FW_FILTER_WR_FLT_DELETED) { - /* Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - /* Something went wrong. Issue a warning about the - * problem and clear everything out. - */ - dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", - idx, ret); - clear_filter(adap, f); - } - } -} - /* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, @@ -1026,151 +944,6 @@ void t4_free_mem(void *addr) kvfree(addr); } -/* Send a Work Request to write the filter at a specified index. We construct - * a Firmware Filter Work Request to have the work done and put the indicated - * filter into "pending" mode which will prevent any further actions against - * it till we get a reply from the firmware on the completion status of the - * request. - */ -static int set_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int ftid; - - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* If the new filter requires loopback Destination MAC and/or VLAN - * rewriting then we need to allocate a Layer 2 Table (L2T) entry for - * the filter. - */ - if (f->fs.newdmac || f->fs.newvlan) { - /* allocate L2T entry for new filter */ - f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, - f->fs.eport, f->fs.dmac); - if (f->l2t == NULL) { - kfree_skb(skb); - return -ENOMEM; - } - } - - ftid = adapter->tids.ftid_base + fidx; - - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); - memset(fwr, 0, sizeof(*fwr)); - - /* It would be nice to put most of the following in t4_hw.c but most - * of the work is translating the cxgbtool ch_filter_specification - * into the Work Request and the definition of that structure is - * currently in cxgbtool.h which isn't appropriate to pull into the - * common code. We may eventually try to come up with a more neutral - * filter specification structure but for now it's easiest to simply - * put this fairly direct code in line ... - */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); - fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_RQTYPE_V(f->fs.type) | - FW_FILTER_WR_NOREPLY_V(0) | - FW_FILTER_WR_IQ_V(f->fs.iq)); - fwr->del_filter_to_l2tix = - htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | - FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | - FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | - FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | - FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | - FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | - FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | - FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | - FW_FILTER_WR_TXCHAN_V(f->fs.eport) | - FW_FILTER_WR_PRIO_V(f->fs.prio) | - FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); - fwr->ethtype = htons(f->fs.val.ethtype); - fwr->ethtypem = htons(f->fs.mask.ethtype); - fwr->frag_to_ovlan_vldm = - (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | - FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | - FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | - FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; - fwr->rx_chan_rx_rpl_iq = - htons(FW_FILTER_WR_RX_CHAN_V(0) | - FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); - fwr->maci_to_matchtypem = - htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | - FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | - FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | - FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | - FW_FILTER_WR_PORT_V(f->fs.val.iport) | - FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | - FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | - FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); - fwr->ptcl = f->fs.val.proto; - fwr->ptclm = f->fs.mask.proto; - fwr->ttyp = f->fs.val.tos; - fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htons(f->fs.val.ivlan); - fwr->ivlanm = htons(f->fs.mask.ivlan); - fwr->ovlan = htons(f->fs.val.ovlan); - fwr->ovlanm = htons(f->fs.mask.ovlan); - memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); - memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); - memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); - memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); - fwr->lp = htons(f->fs.val.lport); - fwr->lpm = htons(f->fs.mask.lport); - fwr->fp = htons(f->fs.val.fport); - fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adapter, skb); - return 0; -} - -/* Delete the filter at a specified index. - */ -static int del_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int len, ftid; - - len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - t4_mgmt_tx(adapter, skb); - return 0; -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -1552,19 +1325,22 @@ EXPORT_SYMBOL(cxgb4_remove_tid); */ static int tid_init(struct tid_info *t) { - size_t size; - unsigned int stid_bmap_size; - unsigned int natids = t->natids; struct adapter *adap = container_of(t, struct adapter, tids); + unsigned int max_ftids = t->nftids + t->nsftids; + unsigned int natids = t->natids; + unsigned int stid_bmap_size; + unsigned int ftid_bmap_size; + size_t size; stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + ftid_bmap_size = BITS_TO_LONGS(t->nftids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + - t->nftids * sizeof(*t->ftid_tab) + - t->nsftids * sizeof(*t->ftid_tab); + max_ftids * sizeof(*t->ftid_tab) + + ftid_bmap_size * sizeof(long); t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) @@ -1574,8 +1350,10 @@ static int tid_init(struct tid_info *t) t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); + spin_lock_init(&t->ftid_lock); t->stids_in_use = 0; t->sftids_in_use = 0; @@ -1590,12 +1368,16 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); - /* Reserve stid 0 for T4/T5 adapters */ - if (!t->stid_base && - (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)) - __set_bit(0, t->stid_bmap); + if (is_offload(adap)) { + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + __set_bit(0, t->stid_bmap); + } + + bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -2514,40 +2296,6 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); } -/* Return an error number if the indicated filter isn't writable ... - */ -static int writable_filter(struct filter_entry *f) -{ - if (f->locked) - return -EPERM; - if (f->pending) - return -EBUSY; - - return 0; -} - -/* Delete the filter at the specified index (if valid). The checks for all - * the common problems with doing this like the filter being locked, currently - * pending in another operation, etc. - */ -static int delete_filter(struct adapter *adapter, unsigned int fidx) -{ - struct filter_entry *f; - int ret; - - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) - return -EINVAL; - - f = &adapter->tids.ftid_tab[fidx]; - ret = writable_filter(f); - if (ret) - return ret; - if (f->valid) - return del_filter_wr(adapter, fidx); - - return 0; -} - int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, __be32 sip, __be16 sport, __be16 vlan, unsigned int queue, unsigned char port, unsigned char mask) @@ -2964,6 +2712,35 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = netdev2adap(dev); + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to setup tc on port %d. Link Down?\n", + pi->port_id); + return -EINVAL; + } + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return cxgb4_config_knode(dev, proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return cxgb4_delete_knode(dev, proto, tc->cls_u32); + default: + return -EOPNOTSUPP; + } + } + + return -EOPNOTSUPP; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@ -2987,6 +2764,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_busy_poll = cxgb_busy_poll, #endif .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, + .ndo_setup_tc = cxgb_setup_tc, }; #ifdef CONFIG_PCI_IOV @@ -4659,6 +4437,7 @@ static void free_some_resources(struct adapter *adapter) t4_free_mem(adapter->l2t); t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); + cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); kfree(adapter->sge.starving_fl); @@ -5003,7 +4782,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_TC; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@ -5087,10 +4867,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) i); } - if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { + if (tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; + } else { + adapter->tc_u32 = cxgb4_init_tc_u32(adapter, + CXGB4_MAX_LINK_HANDLE); + if (!adapter->tc_u32) + dev_warn(&pdev->dev, + "could not offload tc u32, continuing\n"); } if (is_offload(adapter)) { @@ -5274,13 +5060,7 @@ static void remove_one(struct pci_dev *pdev) /* If we allocated filters, free up state associated with any * valid filters ... */ - if (adapter->tids.ftid_tab) { - struct filter_entry *f = &adapter->tids.ftid_tab[0]; - for (i = 0; i < (adapter->tids.nftids + - adapter->tids.nsftids); i++, f++) - if (f->valid) - clear_filter(adapter, f); - } + clear_all_filters(adapter); if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c new file mode 100644 index 000000000000..49d2debb334e --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -0,0 +1,483 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> + +#include "cxgb4.h" +#include "cxgb4_tc_u32_parse.h" +#include "cxgb4_tc_u32.h" + +/* Fill ch_filter_specification with parsed match value/mask pair. */ +static int fill_match_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls, + const struct cxgb4_match_field *entry, + bool next_header) +{ + unsigned int i, j; + u32 val, mask; + int off, err; + bool found; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + off = cls->knode.sel->keys[i].off; + val = cls->knode.sel->keys[i].val; + mask = cls->knode.sel->keys[i].mask; + + if (next_header) { + /* For next headers, parse only keys with offmask */ + if (!cls->knode.sel->keys[i].offmask) + continue; + } else { + /* For the remaining, parse only keys without offmask */ + if (cls->knode.sel->keys[i].offmask) + continue; + } + + found = false; + + for (j = 0; entry[j].val; j++) { + if (off == entry[j].off) { + found = true; + err = entry[j].val(fs, val, mask); + if (err) + return err; + break; + } + } + + if (!found) + return -EINVAL; + } + + return 0; +} + +/* Fill ch_filter_specification with parsed action. */ +static int fill_action_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls) +{ + unsigned int num_actions = 0; + const struct tc_action *a; + struct tcf_exts *exts; + LIST_HEAD(actions); + + exts = cls->knode.exts; + if (tc_no_actions(exts)) + return -EINVAL; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + /* Don't allow more than one action per rule. */ + if (num_actions) + return -EINVAL; + + /* Drop in hardware. */ + if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + num_actions++; + continue; + } + + /* Re-direct to specified port in hardware. */ + if (is_tcf_mirred_redirect(a)) { + struct net_device *n_dev; + unsigned int i, index; + bool found = false; + + index = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (index == n_dev->ifindex) { + fs->action = FILTER_SWITCH; + fs->eport = i; + found = true; + break; + } + } + + /* Interface doesn't belong to any port of + * the underlying hardware. + */ + if (!found) + return -EINVAL; + + num_actions++; + continue; + } + + /* Un-supported action. */ + return -EINVAL; + } + + return 0; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + const struct cxgb4_match_field *start, *link_start = NULL; + struct adapter *adapter = netdev2adap(dev); + struct ch_filter_specification fs; + struct cxgb4_tc_u32_table *t; + struct cxgb4_link *link; + unsigned int filter_id; + u32 uhtid, link_uhtid; + bool is_ipv6 = false; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6)) + return -EOPNOTSUPP; + + /* Fetch the location to insert the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for insertion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + uhtid = TC_U32_USERHTID(cls->knode.handle); + link_uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Ensure link handle uhtid is sane, if specified. */ + if (link_uhtid >= t->size) + return -EINVAL; + + memset(&fs, 0, sizeof(fs)); + + if (protocol == htons(ETH_P_IPV6)) { + start = cxgb4_ipv6_fields; + is_ipv6 = true; + } else { + start = cxgb4_ipv4_fields; + is_ipv6 = false; + } + + if (uhtid != 0x800) { + /* Link must exist from root node before insertion. */ + if (!t->table[uhtid - 1].link_handle) + return -EINVAL; + + /* Link must have a valid supported next header. */ + link_start = t->table[uhtid - 1].match_field; + if (!link_start) + return -EINVAL; + } + + /* Parse links and record them for subsequent jumps to valid + * next headers. + */ + if (link_uhtid) { + const struct cxgb4_next_header *next; + bool found = false; + unsigned int i, j; + u32 val, mask; + int off; + + if (t->table[link_uhtid - 1].link_handle) { + dev_err(adapter->pdev_dev, + "Link handle exists for: 0x%x\n", + link_uhtid); + return -EINVAL; + } + + next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps; + + /* Try to find matches that allow jumps to next header. */ + for (i = 0; next[i].jump; i++) { + if (next[i].offoff != cls->knode.sel->offoff || + next[i].shift != cls->knode.sel->offshift || + next[i].mask != cls->knode.sel->offmask || + next[i].offset != cls->knode.sel->off) + continue; + + /* Found a possible candidate. Find a key that + * matches the corresponding offset, value, and + * mask to jump to next header. + */ + for (j = 0; j < cls->knode.sel->nkeys; j++) { + off = cls->knode.sel->keys[j].off; + val = cls->knode.sel->keys[j].val; + mask = cls->knode.sel->keys[j].mask; + + if (next[i].match_off == off && + next[i].match_val == val && + next[i].match_mask == mask) { + found = true; + break; + } + } + + if (!found) + continue; /* Try next candidate. */ + + /* Candidate to jump to next header found. + * Translate all keys to internal specification + * and store them in jump table. This spec is copied + * later to set the actual filters. + */ + ret = fill_match_fields(adapter, &fs, cls, + start, false); + if (ret) + goto out; + + link = &t->table[link_uhtid - 1]; + link->match_field = next[i].jump; + link->link_handle = cls->knode.handle; + memcpy(&link->fs, &fs, sizeof(fs)); + break; + } + + /* No candidate found to jump to next header. */ + if (!found) + return -EINVAL; + + return 0; + } + + /* Fill ch_filter_specification match fields to be shipped to hardware. + * Copy the linked spec (if any) first. And then update the spec as + * needed. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) { + /* Copy linked ch_filter_specification */ + memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs)); + ret = fill_match_fields(adapter, &fs, cls, + link_start, true); + if (ret) + goto out; + } + + ret = fill_match_fields(adapter, &fs, cls, start, false); + if (ret) + goto out; + + /* Fill ch_filter_specification action fields to be shipped to + * hardware. + */ + ret = fill_action_fields(adapter, &fs, cls); + if (ret) + goto out; + + /* The filter spec has been completely built from the info + * provided from u32. We now set some default fields in the + * spec for sanity. + */ + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs.val.iport = netdev2pinfo(dev)->port_id; + fs.mask.iport = ~0; + + /* Enable filter hit counts. */ + fs.hitcnts = 1; + + /* Set type of filter - IPv6 or IPv4 */ + fs.type = is_ipv6 ? 1 : 0; + + /* Set the filter */ + ret = cxgb4_set_filter(dev, filter_id, &fs); + if (ret) + goto out; + + /* If this is a linked bucket, then set the corresponding + * entry in the bitmap to mark it as belonging to this linked + * bucket. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) + set_bit(filter_id, t->table[uhtid - 1].tid_map); + +out: + return ret; +} + +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int filter_id, max_tids, i, j; + struct cxgb4_link *link = NULL; + struct cxgb4_tc_u32_table *t; + u32 handle, uhtid; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + /* Fetch the location to delete the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for deletion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + handle = cls->knode.handle; + uhtid = TC_U32_USERHTID(cls->knode.handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Delete the specified filter */ + if (uhtid != 0x800) { + link = &t->table[uhtid - 1]; + if (!link->link_handle) + return -EINVAL; + + if (!test_bit(filter_id, link->tid_map)) + return -EINVAL; + } + + ret = cxgb4_del_filter(dev, filter_id); + if (ret) + goto out; + + if (link) + clear_bit(filter_id, link->tid_map); + + /* If a link is being deleted, then delete all filters + * associated with the link. + */ + max_tids = adapter->tids.nftids; + for (i = 0; i < t->size; i++) { + link = &t->table[i]; + + if (link->link_handle == handle) { + for (j = 0; j < max_tids; j++) { + if (!test_bit(j, link->tid_map)) + continue; + + ret = __cxgb4_del_filter(dev, j, NULL); + if (ret) + goto out; + + clear_bit(j, link->tid_map); + } + + /* Clear the link state */ + link->match_field = NULL; + link->link_handle = 0; + memset(&link->fs, 0, sizeof(link->fs)); + break; + } + } + +out: + return ret; +} + +void cxgb4_cleanup_tc_u32(struct adapter *adap) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!adap->tc_u32) + return; + + /* Free up all allocated memory. */ + t = adap->tc_u32; + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + t4_free_mem(link->tid_map); + } + t4_free_mem(adap->tc_u32); +} + +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!size) + return NULL; + + t = t4_alloc_mem(sizeof(*t) + + (size * sizeof(struct cxgb4_link))); + if (!t) + return NULL; + + t->size = size; + + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + unsigned int bmap_size; + unsigned int max_tids; + + max_tids = adap->tids.nftids; + bmap_size = BITS_TO_LONGS(max_tids); + link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size); + if (!link->tid_map) + goto out_no_mem; + bitmap_zero(link->tid_map, max_tids); + } + + return t; + +out_no_mem: + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + if (link->tid_map) + t4_free_mem(link->tid_map); + } + + if (t) + t4_free_mem(t); + + return NULL; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h new file mode 100644 index 000000000000..6bdc885eff22 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h @@ -0,0 +1,57 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_H +#define __CXGB4_TC_U32_H + +#include <net/pkt_cls.h> + +#define CXGB4_MAX_LINK_HANDLE 32 + +static inline bool can_tc_u32_offload(struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + + return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); + +void cxgb4_cleanup_tc_u32(struct adapter *adapter); +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size); +#endif /* __CXGB4_TC_U32_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h new file mode 100644 index 000000000000..a4b99edcc339 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -0,0 +1,294 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_PARSE_H +#define __CXGB4_TC_U32_PARSE_H + +struct cxgb4_match_field { + int off; /* Offset from the beginning of the header to match */ + /* Fill the value/mask pair in the spec if matched */ + int (*val)(struct ch_filter_specification *f, u32 val, u32 mask); +}; + +/* IPv4 match fields */ +static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 16) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + u32 mask_val; + u8 frag_val; + + frag_val = (ntohl(val) >> 13) & 0x00000007; + mask_val = ntohl(mask) & 0x0000FFFF; + + if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */ + f->val.frag = 1; + f->mask.frag = 1; + } else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */ + f->val.frag = 0; + f->mask.frag = 1; + } else { + return -EINVAL; + } + + return 0; +} + +static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 16) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv4_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv4_tos }, + { .off = 4, .val = cxgb4_fill_ipv4_frag }, + { .off = 8, .val = cxgb4_fill_ipv4_proto }, + { .off = 12, .val = cxgb4_fill_ipv4_src_ip }, + { .off = 16, .val = cxgb4_fill_ipv4_dst_ip }, + { .val = NULL } +}; + +/* IPv6 match fields */ +static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 20) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 8) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[4], &val, sizeof(u32)); + memcpy(&f->mask.fip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[8], &val, sizeof(u32)); + memcpy(&f->mask.fip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[12], &val, sizeof(u32)); + memcpy(&f->mask.fip[12], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[4], &val, sizeof(u32)); + memcpy(&f->mask.lip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[8], &val, sizeof(u32)); + memcpy(&f->mask.lip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[12], &val, sizeof(u32)); + memcpy(&f->mask.lip[12], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv6_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv6_tos }, + { .off = 4, .val = cxgb4_fill_ipv6_proto }, + { .off = 8, .val = cxgb4_fill_ipv6_src_ip0 }, + { .off = 12, .val = cxgb4_fill_ipv6_src_ip1 }, + { .off = 16, .val = cxgb4_fill_ipv6_src_ip2 }, + { .off = 20, .val = cxgb4_fill_ipv6_src_ip3 }, + { .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 }, + { .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 }, + { .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 }, + { .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 }, + { .val = NULL } +}; + +/* TCP/UDP match */ +static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.fport = ntohl(val) >> 16; + f->mask.fport = ntohl(mask) >> 16; + f->val.lport = ntohl(val) & 0x0000FFFF; + f->mask.lport = ntohl(mask) & 0x0000FFFF; + + return 0; +}; + +static const struct cxgb4_match_field cxgb4_tcp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +static const struct cxgb4_match_field cxgb4_udp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +struct cxgb4_next_header { + unsigned int offset; /* Offset to next header */ + /* offset, shift, and mask added to offset above + * to get to next header. Useful when using a header + * field's value to jump to next header such as IHL field + * in IPv4 header. + */ + unsigned int offoff; + u32 shift; + u32 mask; + /* match criteria to make this jump */ + unsigned int match_off; + u32 match_val; + u32 match_mask; + /* location of jump to make */ + const struct cxgb4_match_field *jump; +}; + +/* Accept a rule with a jump to transport layer header based on IHL field in + * IPv4 header. + */ +static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = { + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00, + .jump = cxgb4_tcp_fields }, + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; + +/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header + * to get to transport layer header. + */ +static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000, + .jump = cxgb4_tcp_fields }, + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; + +struct cxgb4_link { + const struct cxgb4_match_field *match_field; /* Next header */ + struct ch_filter_specification fs; /* Match spec associated with link */ + u32 link_handle; /* Knode handle associated with the link */ + unsigned long *tid_map; /* Bitmap for filter tids */ +}; + +struct cxgb4_tc_u32_table { + unsigned int size; /* number of entries in table */ + struct cxgb4_link table[0]; /* Jump table */ +}; +#endif /* __CXGB4_TC_U32_PARSE_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index b3544f6b88ca..47bd14f602db 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -106,6 +106,7 @@ struct tid_info { unsigned int atid_base; struct filter_entry *ftid_tab; + unsigned long *ftid_bmap; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -126,6 +127,8 @@ struct tid_info { atomic_t tids_in_use; /* TIDs in the HASH */ atomic_t hash_tids_in_use; + /* lock for setting/clearing filter bitmap */ + spinlock_t ftid_lock; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@ -185,6 +188,27 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6); +/* Filter operation context to allow callers of cxgb4_set_filter() and + * cxgb4_del_filter() to wait for an asynchronous completion. + */ +struct filter_ctx { + struct completion completion; /* completion rendezvous */ + void *closure; /* caller's opaque information */ + int result; /* result of operation */ + u32 tid; /* to store tid */ +}; + +struct ch_filter_specification; + +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx); +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx); +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); +int cxgb4_del_filter(struct net_device *dev, int filter_id); + static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 36361f8bf894..90f9c5481290 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -60,6 +60,8 @@ struct ftgmac100 { struct ftgmac100_descs *descs; dma_addr_t descs_dma_addr; + struct page *rx_pages[RX_QUEUE_ENTRIES]; + unsigned int rx_pointer; unsigned int tx_clean_pointer; unsigned int tx_pointer; @@ -77,6 +79,9 @@ struct ftgmac100 { int int_mask_all; bool use_ncsi; bool enabled; + + u32 rxdes0_edorr_mask; + u32 txdes0_edotr_mask; }; static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, @@ -257,10 +262,11 @@ static bool ftgmac100_rxdes_packet_ready(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY); } -static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { /* clear status bits */ - rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask); } static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes) @@ -298,9 +304,10 @@ static bool ftgmac100_rxdes_multicast(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST); } -static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask); } static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes, @@ -341,18 +348,27 @@ static bool ftgmac100_rxdes_ipcs_err(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR); } +static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) +{ + return &priv->rx_pages[rxdes - priv->descs->rxdes]; +} + /* * rxdes2 is not used by hardware. We use it to keep track of page. * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu(). */ -static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page) +static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes, + struct page *page) { - rxdes->rxdes2 = (unsigned int)page; + *ftgmac100_rxdes_page_slot(priv, rxdes) = page; } -static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes) +static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - return (struct page *)rxdes->rxdes2; + return *ftgmac100_rxdes_page_slot(priv, rxdes); } /****************************************************************************** @@ -382,7 +398,7 @@ ftgmac100_rx_locate_first_segment(struct ftgmac100 *priv) if (ftgmac100_rxdes_first_segment(rxdes)) return rxdes; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } @@ -453,7 +469,7 @@ static void ftgmac100_rx_drop_packet(struct ftgmac100 *priv) if (ftgmac100_rxdes_last_segment(rxdes)) done = true; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } while (!done && ftgmac100_rxdes_packet_ready(rxdes)); @@ -501,7 +517,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) do { dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); unsigned int size; dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE); @@ -545,10 +561,11 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) /****************************************************************************** * internal functions (transmit descriptor) *****************************************************************************/ -static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_reset(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { /* clear all except end of ring bit */ - txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask); txdes->txdes1 = 0; txdes->txdes2 = 0; txdes->txdes3 = 0; @@ -569,9 +586,10 @@ static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes) txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN); } -static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { - txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask); } static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes) @@ -690,7 +708,7 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv) dev_kfree_skb(skb); - ftgmac100_txdes_reset(txdes); + ftgmac100_txdes_reset(priv, txdes); ftgmac100_tx_clean_pointer_advance(priv); @@ -779,9 +797,9 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, return -ENOMEM; } - ftgmac100_rxdes_set_page(rxdes, page); + ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); return 0; } @@ -791,7 +809,7 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv) for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); if (!page) @@ -828,7 +846,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) return -ENOMEM; /* initialize RX ring */ - ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); + ftgmac100_rxdes_set_end_of_ring(priv, + &priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; @@ -838,7 +857,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) } /* initialize TX ring */ - ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); + ftgmac100_txdes_set_end_of_ring(priv, + &priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); return 0; err: @@ -1055,14 +1075,12 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) } if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF | - FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG)) { + FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) { if (net_ratelimit()) - netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status, + netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status, status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "", status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "", - status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "", - status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : ""); + status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : ""); if (status & FTGMAC100_INT_NO_RXBUF) { /* RX buffer unavailable */ @@ -1092,6 +1110,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); + unsigned int status; int err; err = ftgmac100_alloc_buffers(priv); @@ -1117,6 +1136,11 @@ static int ftgmac100_open(struct net_device *netdev) ftgmac100_init_hw(priv); ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10); + + /* Clear stale interrupts */ + status = ioread32(priv->base + FTGMAC100_OFFSET_ISR); + iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR); + if (netdev->phydev) phy_start(netdev->phydev); else if (priv->use_ncsi) @@ -1226,12 +1250,21 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) struct ftgmac100 *priv = netdev_priv(netdev); struct platform_device *pdev = to_platform_device(priv->dev); int i, err = 0; + u32 reg; /* initialize mdio bus */ priv->mii_bus = mdiobus_alloc(); if (!priv->mii_bus) return -EIO; + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + /* This driver supports the old MDIO interface */ + reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR); + reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE; + iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR); + }; + priv->mii_bus->name = "ftgmac100_mdio"; snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id); @@ -1355,9 +1388,18 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_XPKT_ETH | FTGMAC100_INT_XPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); + + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + priv->rxdes0_edorr_mask = BIT(30); + priv->txdes0_edotr_mask = BIT(30); + } else { + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + } + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) { if (!IS_ENABLED(CONFIG_NET_NCSI)) { @@ -1367,7 +1409,6 @@ static int ftgmac100_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Using NCSI interface\n"); priv->use_ncsi = true; - priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG; priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler); if (!priv->ndev) goto err_ncsi_dev; diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 13408d448b05..a7ce0ac8858a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -134,6 +134,11 @@ #define FTGMAC100_DMAFIFOS_TXDMA_REQ (1 << 31) /* + * Feature Register + */ +#define FTGMAC100_REVR_NEW_MDIO_INTERFACE BIT(31) + +/* * Receive buffer size register */ #define FTGMAC100_RBSR_SIZE(x) ((x) & 0x3fff) @@ -152,6 +157,7 @@ #define FTGMAC100_MACCR_FULLDUP (1 << 8) #define FTGMAC100_MACCR_GIGA_MODE (1 << 9) #define FTGMAC100_MACCR_CRC_APD (1 << 10) +#define FTGMAC100_MACCR_PHY_LINK_LEVEL (1 << 11) #define FTGMAC100_MACCR_RX_RUNT (1 << 12) #define FTGMAC100_MACCR_JUMBO_LF (1 << 13) #define FTGMAC100_MACCR_RX_ALL (1 << 14) @@ -189,7 +195,6 @@ struct ftgmac100_txdes { } __attribute__ ((aligned(16))); #define FTGMAC100_TXDES0_TXBUF_SIZE(x) ((x) & 0x3fff) -#define FTGMAC100_TXDES0_EDOTR (1 << 15) #define FTGMAC100_TXDES0_CRC_ERR (1 << 19) #define FTGMAC100_TXDES0_LTS (1 << 28) #define FTGMAC100_TXDES0_FTS (1 << 29) @@ -215,7 +220,6 @@ struct ftgmac100_rxdes { } __attribute__ ((aligned(16))); #define FTGMAC100_RXDES0_VDBC 0x3fff -#define FTGMAC100_RXDES0_EDORR (1 << 15) #define FTGMAC100_RXDES0_MULTICAST (1 << 16) #define FTGMAC100_RXDES0_BROADCAST (1 << 17) #define FTGMAC100_RXDES0_RX_ERR (1 << 18) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index d7e1f8c7ae92..059aaeda46b1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -994,10 +994,10 @@ static void hns_nic_adjust_link(struct net_device *ndev) struct hnae_handle *h = priv->ae_handle; int state = 1; - if (priv->phy) { + if (ndev->phydev) { h->dev->ops->adjust_link(h, ndev->phydev->speed, ndev->phydev->duplex); - state = priv->phy->link; + state = ndev->phydev->link; } state = state && h->dev->ops->get_status(h); @@ -1022,7 +1022,6 @@ static void hns_nic_adjust_link(struct net_device *ndev) */ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) { - struct hns_nic_priv *priv = netdev_priv(ndev); struct phy_device *phy_dev = h->phy_dev; int ret; @@ -1046,8 +1045,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; - priv->phy = phy_dev; - return 0; } @@ -1224,8 +1221,8 @@ static int hns_nic_net_up(struct net_device *ndev) if (ret) goto out_start_err; - if (priv->phy) - phy_start(priv->phy); + if (ndev->phydev) + phy_start(ndev->phydev); clear_bit(NIC_STATE_DOWN, &priv->state); (void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ); @@ -1259,8 +1256,8 @@ static void hns_nic_net_down(struct net_device *ndev) netif_tx_disable(ndev); priv->link = 0; - if (priv->phy) - phy_stop(priv->phy); + if (ndev->phydev) + phy_stop(ndev->phydev); ops = priv->ae_handle->dev->ops; @@ -1359,8 +1356,7 @@ static void hns_nic_net_timeout(struct net_device *ndev) static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; if (!netif_running(netdev)) return -EINVAL; @@ -2017,9 +2013,8 @@ static int hns_nic_dev_remove(struct platform_device *pdev) hns_nic_uninit_ring_data(priv); priv->ring_data = NULL; - if (priv->phy) - phy_disconnect(priv->phy); - priv->phy = NULL; + if (ndev->phydev) + phy_disconnect(ndev->phydev); if (!IS_ERR_OR_NULL(priv->ae_handle)) hnae_put_handle(priv->ae_handle); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index 44bb3015eed3..5b412de350aa 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -59,7 +59,6 @@ struct hns_nic_priv { u32 port_id; int phy_mode; int phy_led_val; - struct phy_device *phy; struct net_device *netdev; struct device *dev; struct hnae_handle *ae_handle; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 5eb3245bdf86..47e59bbfd061 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -48,9 +48,9 @@ static u32 hns_nic_get_link(struct net_device *net_dev) h = priv->ae_handle; - if (priv->phy) { - if (!genphy_read_status(priv->phy)) - link_stat = priv->phy->link; + if (net_dev->phydev) { + if (!genphy_read_status(net_dev->phydev)) + link_stat = net_dev->phydev->link; else link_stat = 0; } @@ -64,15 +64,14 @@ static u32 hns_nic_get_link(struct net_device *net_dev) } static void hns_get_mdix_mode(struct net_device *net_dev, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { int mdix_ctrl, mdix, retval, is_resolved; - struct hns_nic_priv *priv = netdev_priv(net_dev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; return; } @@ -89,35 +88,35 @@ static void hns_get_mdix_mode(struct net_device *net_dev, switch (mdix_ctrl) { case 0x0: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI; break; case 0x1: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X; break; case 0x3: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; break; default: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; break; } if (!is_resolved) - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; else if (mdix) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } /** - *hns_nic_get_settings - implement ethtool get settings + *hns_nic_get_link_ksettings - implement ethtool get link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_get_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_get_link_ksettings(struct net_device *net_dev, + struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -125,6 +124,7 @@ static int hns_nic_get_settings(struct net_device *net_dev, int ret; u8 duplex; u16 speed; + u32 supported, advertising; if (!priv || !priv->ae_handle) return -ESRCH; @@ -139,38 +139,43 @@ static int hns_nic_get_settings(struct net_device *net_dev, return -EINVAL; } + ethtool_convert_link_mode_to_legacy_u32(&supported, + cmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + /* When there is no phy, autoneg is off. */ - cmd->autoneg = false; - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.autoneg = false; + cmd->base.cmd = speed; + cmd->base.duplex = duplex; - if (priv->phy) - (void)phy_ethtool_gset(priv->phy, cmd); + if (net_dev->phydev) + (void)phy_ethtool_ksettings_get(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { - ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = (u32)SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - if (cmd->autoneg) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg) + advertising |= ADVERTISED_Autoneg; - cmd->supported |= h->if_support; + supported |= h->if_support; if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - cmd->supported |= SUPPORTED_TP; - cmd->advertising |= ADVERTISED_1000baseT_Full; + supported |= SUPPORTED_TP; + advertising |= ADVERTISED_1000baseT_Full; } else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_10000baseKR_Full; + supported |= SUPPORTED_FIBRE; + advertising |= ADVERTISED_10000baseKR_Full; } switch (h->media_type) { case HNAE_MEDIA_TYPE_FIBER: - cmd->port = PORT_FIBRE; + cmd->base.port = PORT_FIBRE; break; case HNAE_MEDIA_TYPE_COPPER: - cmd->port = PORT_TP; + cmd->base.port = PORT_TP; break; case HNAE_MEDIA_TYPE_UNKNOWN: default: @@ -178,23 +183,27 @@ static int hns_nic_get_settings(struct net_device *net_dev, } if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG)) - cmd->supported |= SUPPORTED_Pause; + supported |= SUPPORTED_Pause; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); - cmd->transceiver = XCVR_EXTERNAL; - cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22); + cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22; hns_get_mdix_mode(net_dev, cmd); return 0; } /** - *hns_nic_set_settings - implement ethtool set settings + *hns_nic_set_link_settings - implement ethtool set link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_set_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_set_link_ksettings(struct net_device *net_dev, + const struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -208,24 +217,25 @@ static int hns_nic_set_settings(struct net_device *net_dev, return -ENODEV; h = priv->ae_handle; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 || - cmd->duplex != DUPLEX_FULL) + if (cmd->base.autoneg == AUTONEG_ENABLE || + speed != SPEED_10000 || + cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE) return -EINVAL; - if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) + if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF) return -EINVAL; - if (priv->phy) - return phy_ethtool_sset(priv->phy, cmd); + if (net_dev->phydev) + return phy_ethtool_ksettings_set(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && - speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) + speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) return -EINVAL; } else { netdev_err(net_dev, "Not supported!"); @@ -233,7 +243,7 @@ static int hns_nic_set_settings(struct net_device *net_dev, } if (h->dev->ops->adjust_link) { - h->dev->ops->adjust_link(h, (int)speed, cmd->duplex); + h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex); return 0; } @@ -305,7 +315,7 @@ static int __lb_setup(struct net_device *ndev, { int ret = 0; struct hns_nic_priv *priv = netdev_priv(ndev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = ndev->phydev; struct hnae_handle *h = priv->ae_handle; switch (loop) { @@ -910,7 +920,7 @@ void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES], ETH_GSTRING_LEN); buff += ETH_GSTRING_LEN; - if ((priv->phy) && (!priv->phy->is_c45)) + if ((netdev->phydev) && (!netdev->phydev->is_c45)) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY], ETH_GSTRING_LEN); @@ -996,7 +1006,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII) cnt--; - if ((!priv->phy) || (priv->phy->is_c45)) + if ((!netdev->phydev) || (netdev->phydev->is_c45)) cnt--; return cnt; @@ -1015,8 +1025,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) int hns_phy_led_set(struct net_device *netdev, int value) { int retval; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED); retval |= phy_write(phy_dev, HNS_LED_FC_REG, value); @@ -1039,7 +1048,7 @@ int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_handle *h = priv->ae_handle; - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; int ret; if (phy_dev) @@ -1159,8 +1168,7 @@ static int hns_get_regs_len(struct net_device *net_dev) static int hns_nic_nway_reset(struct net_device *netdev) { int ret = 0; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy = priv->phy; + struct phy_device *phy = netdev->phydev; if (netif_running(netdev)) { if (phy) @@ -1267,8 +1275,6 @@ static int hns_get_rxnfc(struct net_device *netdev, static const struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, - .get_settings = hns_nic_get_settings, - .set_settings = hns_nic_set_settings, .get_ringparam = hns_get_ringparam, .get_pauseparam = hns_get_pauseparam, .set_pauseparam = hns_set_pauseparam, @@ -1288,6 +1294,8 @@ static const struct ethtool_ops hns_ethtool_ops = { .get_rxfh = hns_get_rss, .set_rxfh = hns_set_rss, .get_rxnfc = hns_get_rxnfc, + .get_link_ksettings = hns_nic_get_link_ksettings, + .set_link_ksettings = hns_nic_set_link_ksettings, }; void hns_ethtool_set_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 2e1b17ad52a3..ad03763e009a 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -334,7 +334,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_err("ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { e_info("registered PHC clock\n"); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ed39cbad24bd..f1feceab758a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -669,7 +669,7 @@ void i40e_ptp_init(struct i40e_pf *pf) pf->ptp_clock = NULL; dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n", __func__); - } else { + } else if (pf->ptp_clock) { struct timespec64 ts; u32 regval; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 66dfa2085cc7..1dd14e166dc8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1159,7 +1159,7 @@ void igb_ptp_init(struct igb_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5431bfe3339..a92277683a64 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1254,7 +1254,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); return err; - } else + } else if (adapter->ptp_clock) e_dev_info("registered PHC device on %s\n", netdev->name); /* set default timestamp mode to disabled here. We do this in diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ca6b5013e275..2909372c4da0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1894,11 +1894,8 @@ static void mtk_uninit(struct net_device *dev) struct mtk_eth *eth = mac->hw; phy_disconnect(mac->phy_dev); - mtk_mdio_cleanup(eth); mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); - free_irq(eth->irq[1], dev); - free_irq(eth->irq[2], dev); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -2454,6 +2451,7 @@ static int mtk_remove(struct platform_device *pdev) netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); + mtk_mdio_cleanup(eth); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index f04a423ff79d..a58d96cf1ed1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -785,17 +785,23 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + int ret; + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) return mlx4_internal_err_ret_value(dev, op, op_modifier); + down_read(&mlx4_priv(dev)->cmd.switch_sem); if (mlx4_priv(dev)->cmd.use_events) - return mlx4_cmd_wait(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_wait(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); else - return mlx4_cmd_poll(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_poll(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); + + up_read(&mlx4_priv(dev)->cmd.switch_sem); + return ret; } return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); @@ -2454,6 +2460,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { + init_rwsem(&priv->cmd.switch_sem); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2583,6 +2590,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; @@ -2606,6 +2614,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; + up_write(&priv->cmd.switch_sem); return err; } @@ -2618,6 +2627,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; for (i = 0; i < priv->cmd.max_cmds; ++i) @@ -2626,6 +2636,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) kfree(priv->cmd.context); up(&priv->cmd.poll_sem); + up_write(&priv->cmd.switch_sem); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 1494997c4f7e..08fc5fc56d43 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -298,7 +298,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (IS_ERR(mdev->ptp_clock)) { mdev->ptp_clock = NULL; mlx4_err(mdev, "ptp_clock_register failed\n"); - } else { + } else if (mdev->ptp_clock) { mlx4_info(mdev, "registered PHC clock\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c46355bce613..f2e8beddcf44 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -72,7 +72,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, frag_info->dma_dir); - if (dma_mapping_error(priv->ddev, dma)) { + if (unlikely(dma_mapping_error(priv->ddev, dma))) { put_page(page); return -ENOMEM; } @@ -108,7 +108,8 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, ring_alloc[i].page_size) continue; - if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) + if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], + frag_info, gfp))) goto out; } @@ -585,7 +586,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; - if (!frags[nr].page) + if (unlikely(!frags[nr].page)) goto fail; dma = be64_to_cpu(rx_desc->data[nr].addr); @@ -625,7 +626,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, dma_addr_t dma; skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (!skb) { + if (unlikely(!skb)) { en_dbg(RX_ERR, priv, "Failed allocating skb\n"); return NULL; } @@ -736,7 +737,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, { __wsum csum_pseudo_hdr = 0; - if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || + ipv6h->nexthdr == IPPROTO_HOPOPTS)) return -1; hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); @@ -769,7 +771,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) return -1; #endif return 0; @@ -796,10 +798,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud u64 timestamp; bool l2_tunnel; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return 0; - if (budget <= 0) + if (unlikely(budget <= 0)) return polled; /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ @@ -902,9 +904,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (!mlx4_en_xmit_frame(frags, dev, + if (likely(!mlx4_en_xmit_frame(frags, dev, length, tx_index, - &doorbell_pending)) + &doorbell_pending))) goto consumed; goto xdp_drop; /* Drop on xmit failure */ default: @@ -912,7 +914,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_ABORTED: case XDP_DROP: xdp_drop: - if (mlx4_en_rx_recycle(ring, frags)) + if (likely(mlx4_en_rx_recycle(ring, frags))) goto consumed; goto next; } @@ -1016,12 +1018,12 @@ xdp_drop: /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); - if (!skb) { + if (unlikely(!skb)) { ring->dropped++; goto next; } - if (unlikely(priv->validate_loopback)) { + if (unlikely(priv->validate_loopback)) { validate_loopback(priv, skb); goto next; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c9d7fc5159f2..c128ba3ef014 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -46,6 +46,7 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> #include <net/devlink.h> +#include <linux/rwsem.h> #include <linux/mlx4/device.h> #include <linux/mlx4/driver.h> @@ -627,6 +628,7 @@ struct mlx4_cmd { struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; + struct rw_semaphore switch_sem; int max_cmds; spinlock_t context_lock; int free_head; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 67146624eb58..f44d089e2ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -45,15 +45,12 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - spin_lock(&srq_table->lock); - + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + rcu_read_unlock(); if (srq) atomic_inc(&srq->refcount); - - spin_unlock(&srq_table->lock); - - if (!srq) { + else { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -301,12 +298,11 @@ struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - unsigned long flags; - spin_lock_irqsave(&srq_table->lock, flags); + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - spin_unlock_irqrestore(&srq_table->lock, flags); + rcu_read_unlock(); return srq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7dd4763e726e..346015407b70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -65,6 +65,8 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 +#define MLX5_RX_HEADROOM NET_SKB_PAD + #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_WQE_SZ 18 @@ -99,6 +101,18 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_ICOSQ_MAX_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) +#define MLX5E_XDP_IHS_DS_COUNT \ + DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT \ + (MLX5E_XDP_IHS_DS_COUNT + \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_WQEBBS \ + DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -302,10 +316,20 @@ struct mlx5e_page_cache { struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - u32 wqe_sz; - struct sk_buff **skb; - struct mlx5e_mpw_info *wqe_info; - void *mtt_no_align; + + union { + struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_mpw_info *info; + void *mtt_no_align; + u32 mtt_offset; + } mpwqe; + }; + struct { + u8 page_order; + u32 wqe_sz; /* wqe data buffer size */ + u8 map_dir; /* dma map direction */ + } buff; __be32 mkey_be; struct device *pdev; @@ -321,9 +345,9 @@ struct mlx5e_rq { unsigned long state; int ix; - u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ + struct bpf_prog *xdp_prog; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -370,11 +394,17 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; -struct mlx5e_ico_wqe_info { +struct mlx5e_sq_wqe_info { u8 opcode; u8 num_wqebbs; }; +enum mlx5e_sq_type { + MLX5E_SQ_TXQ, + MLX5E_SQ_ICO, + MLX5E_SQ_XDP +}; + struct mlx5e_sq { /* data path */ @@ -392,10 +422,20 @@ struct mlx5e_sq { struct mlx5e_cq cq; - /* pointers to per packet info: write@xmit, read@completion */ - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; + /* pointers to per tx element info: write@xmit, read@completion */ + union { + struct { + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } txq; + struct mlx5e_sq_wqe_info *ico_wqe; + struct { + struct mlx5e_sq_wqe_info *wqe_info; + struct mlx5e_dma_info *di; + bool doorbell; + } xdp; + } db; /* read only */ struct mlx5_wq_cyc wq; @@ -417,8 +457,8 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; - struct mlx5e_ico_wqe_info *ico_wqe_info; u32 rate_limit; + u8 type; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -434,8 +474,10 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_sq xdp_sq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_sq icosq; /* internal control operations */ + bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -617,6 +659,7 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct bpf_prog *xdp_prog; /* priv data path fields - end */ unsigned long state; @@ -663,7 +706,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_sq_descs(struct mlx5e_sq *sq); void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); @@ -764,7 +807,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) { - return rq->mpwqe_mtt_offset + + return rq->mpwqe.mtt_offset + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 847a8f3ac2b2..13dc388667b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -273,7 +273,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); - if (IS_ERR_OR_NULL(tstamp->ptp)) { + if (IS_ERR(tstamp->ptp)) { mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", PTR_ERR(tstamp->ptp)); tstamp->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8595b507e200..a9fc9d4c6af4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,6 +34,7 @@ #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> +#include <linux/bpf.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -50,7 +51,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; u16 max_inline; u8 min_inline_mode; - bool icosq; + enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -63,12 +64,55 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; struct mlx5e_cq_param icosq_cq; }; +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +{ + priv->params.rq_wq_type = rq_type; + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + mlx5_core_info(priv->mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); +} + +static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +{ + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && + !priv->xdp_prog ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + mlx5e_set_rq_type_params(priv, rq_type); +} + static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -136,6 +180,9 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; @@ -314,7 +361,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; struct mlx5_wqe_data_seg *dseg = &wqe->data; - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); @@ -342,21 +389,21 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; int i; - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) + rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, cpu_to_node(c->cpu)); - if (unlikely(!rq->mtt_no_align)) + if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; - wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc, + wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc, MLX5_UMR_ALIGN); wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, PCI_DMA_TODEVICE); @@ -370,14 +417,14 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, err_unmap_mtts: while (--i >= 0) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); + kfree(rq->mpwqe.mtt_no_align); err_free_wqe_info: - kfree(rq->wqe_info); + kfree(rq->mpwqe.info); err_out: return -ENOMEM; @@ -390,13 +437,13 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) int i; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); - kfree(rq->wqe_info); + kfree(rq->mpwqe.mtt_no_align); + kfree(rq->mpwqe.info); } static int mlx5e_create_rq(struct mlx5e_channel *c, @@ -408,6 +455,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; + u32 frag_sz; + int npages; int wq_sz; int err; int i; @@ -430,6 +479,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->xdp_prog = priv->xdp_prog; + + rq->buff.map_dir = DMA_FROM_DEVICE; + if (rq->xdp_prog) + rq->buff.map_dir = DMA_BIDIRECTIONAL; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -437,34 +491,45 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_mtt_offset = c->ix * + rq->mpwqe.mtt_offset = c->ix * MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); - rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->wqe_sz; + + rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->buff.wqe_sz; rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); err = mlx5e_rq_alloc_mpwqe_info(rq, c); if (err) goto err_rq_wq_destroy; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { + rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->dma_info) { err = -ENOMEM; goto err_rq_wq_destroy; } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->wqe_sz = (priv->params.lro_en) ? + rq->buff.wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); - byte_count = rq->wqe_sz; + byte_count = rq->buff.wqe_sz; + + /* calc the required page order */ + frag_sz = MLX5_RX_HEADROOM + + byte_count /* packet data */ + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frag_sz = SKB_DATA_ALIGN(frag_sz); + + npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->buff.page_order = order_base_2(npages); + byte_count |= MLX5_HW_START_PADDING; rq->mkey_be = c->mkey_be; } @@ -482,6 +547,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->page_cache.head = 0; rq->page_cache.tail = 0; + if (rq->xdp_prog) + bpf_prog_add(rq->xdp_prog, 1); + return 0; err_rq_wq_destroy: @@ -494,12 +562,15 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int i; + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->skb); + kfree(rq->dma_info); } for (i = rq->page_cache.head; i != rq->page_cache.tail; @@ -641,7 +712,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; @@ -676,8 +747,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -701,26 +772,65 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) { - kfree(sq->wqe_info); - kfree(sq->dma_fifo); - kfree(sq->skb); + kfree(sq->db.xdp.di); + kfree(sq->db.xdp.wqe_info); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; - sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); - sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, - numa); - sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL, - numa); + sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + GFP_KERNEL, numa); + sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { + mlx5e_free_sq_xdp_db(sq); + return -ENOMEM; + } + + return 0; +} + +static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.ico_wqe); +} + +static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +{ + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) { - mlx5e_free_sq_db(sq); + sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.ico_wqe) + return -ENOMEM; + + return 0; +} + +static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.txq.wqe_info); + kfree(sq->db.txq.dma_fifo); + kfree(sq->db.txq.skb); +} + +static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), + GFP_KERNEL, numa); + sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), + GFP_KERNEL, numa); + sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { + mlx5e_free_sq_txq_db(sq); return -ENOMEM; } @@ -729,6 +839,46 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return 0; } +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_sq_txq_db(sq); + break; + case MLX5E_SQ_ICO: + mlx5e_free_sq_ico_db(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_sq_xdp_db(sq); + break; + } +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + return mlx5e_alloc_sq_txq_db(sq, numa); + case MLX5E_SQ_ICO: + return mlx5e_alloc_sq_ico_db(sq, numa); + case MLX5E_SQ_XDP: + return mlx5e_alloc_sq_xdp_db(sq, numa); + } + + return 0; +} + +static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +{ + switch (sq_type) { + case MLX5E_SQ_ICO: + return MLX5E_ICOSQ_MAX_WQEBBS; + case MLX5E_SQ_XDP: + return MLX5E_XDP_TX_WQEBBS; + } + return MLX5_SEND_WQE_MAX_WQEBBS; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -741,6 +891,13 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; + sq->type = param->type; + sq->pdev = c->pdev; + sq->tstamp = &priv->tstamp; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; @@ -769,18 +926,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - if (param->icosq) { - u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - - sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * - wq_sz, - GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!sq->ico_wqe_info) { - err = -ENOMEM; - goto err_free_sq_db; - } - } else { + if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; txq_ix = c->ix + tc * priv->params.num_channels; @@ -788,19 +934,11 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; - sq->mkey_be = c->mkey_be; - sq->channel = c; - sq->tc = tc; - sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; -err_free_sq_db: - mlx5e_free_sq_db(sq); - err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -815,7 +953,6 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; - kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -844,11 +981,12 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? + 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -963,12 +1101,14 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) netif_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) + if (mlx5e_sq_has_room_for(sq, 1)) { + sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; mlx5e_send_nop(sq, true); + } } mlx5e_disable_sq(sq); - mlx5e_free_tx_descs(sq); + mlx5e_free_sq_descs(sq); mlx5e_destroy_sq(sq); } @@ -1329,14 +1469,31 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } + if (priv->xdp_prog) { + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation); + if (err) + goto err_close_sqs; + + err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); + if (err) { + mlx5e_close_cq(&c->xdp_sq.cq); + goto err_close_sqs; + } + } + + c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) - goto err_close_sqs; + goto err_close_xdp_sq; netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; +err_close_xdp_sq: + mlx5e_close_sq(&c->xdp_sq); err_close_sqs: mlx5e_close_sqs(c); @@ -1365,9 +1522,13 @@ err_napi_del: static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_sq(&c->xdp_sq); mlx5e_close_sqs(c); mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1441,6 +1602,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, param->max_inline = priv->params.tx_max_inline; param->min_inline_mode = priv->params.tx_min_inline_mode; + param->type = MLX5E_SQ_TXQ; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1514,7 +1676,22 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - param->icosq = true; + param->type = MLX5E_SQ_ICO; +} + +static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + + param->max_inline = priv->params.tx_max_inline; + /* FOR XDP SQs will support only L2 inline mode */ + param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->type = MLX5E_SQ_XDP; } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) @@ -1523,6 +1700,7 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_chan mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); @@ -2901,6 +3079,92 @@ static void mlx5e_tx_timeout(struct net_device *dev) schedule_work(&priv->tx_timeout_work); } +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct bpf_prog *old_prog; + int err = 0; + bool reset, was_opened; + int i; + + mutex_lock(&priv->state_lock); + + if ((netdev->features & NETIF_F_LRO) && prog) { + netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + err = -EINVAL; + goto unlock; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + /* no need for full reset when exchanging programs */ + reset = (!priv->xdp_prog || !prog); + + if (was_opened && reset) + mlx5e_close_locked(netdev); + + /* exchange programs */ + old_prog = xchg(&priv->xdp_prog, prog); + if (prog) + bpf_prog_add(prog, 1); + if (old_prog) + bpf_prog_put(old_prog); + + if (reset) /* change RQ type according to priv->xdp_prog */ + mlx5e_set_rq_priv_params(priv); + + if (was_opened && reset) + mlx5e_open_locked(netdev); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->params.num_channels); + for (i = 0; i < priv->params.num_channels; i++) { + struct mlx5e_channel *c = priv->channel[i]; + + set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + napi_synchronize(&c->napi); + /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ + + old_prog = xchg(&c->rq.xdp_prog, prog); + + clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + /* napi_schedule in case we have missed anything */ + set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); + napi_schedule(&c->napi); + + if (old_prog) + bpf_prog_put(old_prog); + } + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static bool mlx5e_xdp_attached(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return !!priv->xdp_prog; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx5e_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2920,6 +3184,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2951,6 +3216,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -3025,13 +3291,6 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, indirection_rqt[i] = i % num_channels; } -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); -} - static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) { enum pcie_link_width width; @@ -3111,11 +3370,13 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_admin = false; @@ -3128,33 +3389,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.rx_cqe_compress_admin = cqe_compress_heuristic(link_speed, pci_bw); } - priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; - switch (priv->params.rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - priv->params.rx_cqe_compress ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + mlx5e_set_rq_priv_params(priv); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; - break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - } - - mlx5_core_info(mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - priv->params.rx_cqe_compress_admin); - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); @@ -3174,19 +3413,16 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - + /* Extra room needed for build_skb */ + MLX5_RX_HEADROOM - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Initialize pflags */ MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; - #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index dc8677933f76..0a81bd34abbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -179,50 +179,99 @@ unlock: mutex_unlock(&priv->state_lock); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) + +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { - struct sk_buff *skb; - dma_addr_t dma_addr; + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); - if (unlikely(!skb)) - return -ENOMEM; + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } - dma_addr = dma_map_single(rq->pdev, - /* hw start padding */ - skb->data, - /* hw end padding */ - rq->wqe_sz, - DMA_FROM_DEVICE); + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} - if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) - goto err_free_skb; +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr); + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } - rq->skb[ix] = skb; + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, + RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + return true; +} + +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct page *page; + + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + + page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!page)) + return -ENOMEM; + + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, + RQ_PAGE_SIZE(rq), rq->buff.map_dir); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { + put_page(page); + return -ENOMEM; + } return 0; +} -err_free_skb: - dev_kfree_skb(skb); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; - return -ENOMEM; + dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), + rq->buff.map_dir); + put_page(dma_info->page); +} + +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_dma_info *di = &rq->dma_info[ix]; + + if (unlikely(mlx5e_page_alloc_mapped(rq, di))) + return -ENOMEM; + + wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + return 0; } void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - struct sk_buff *skb = rq->skb[ix]; + struct mlx5e_dma_info *di = &rq->dma_info[ix]; - if (skb) { - rq->skb[ix] = NULL; - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - dev_kfree_skb(skb); - } + mlx5e_page_release(rq, di, true); } static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) @@ -279,7 +328,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -288,8 +337,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); } @@ -299,90 +348,17 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; - sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - - if (tail_next == cache->head) { - rq->stats.cache_full++; - return false; - } - - cache->page_cache[cache->tail] = *dma_info; - cache->tail = tail_next; - return true; -} - -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - - if (unlikely(cache->head == cache->tail)) { - rq->stats.cache_empty++; - return false; - } - - if (page_ref_count(cache->page_cache[cache->head].page) != 1) { - rq->stats.cache_busy++; - return false; - } - - *dma_info = cache->page_cache[cache->head]; - cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); - rq->stats.cache_reuse++; - - dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, - DMA_FROM_DEVICE); - return true; -} - -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct page *page; - - if (mlx5e_rx_cache_get(rq, dma_info)) - return 0; - - page = dev_alloc_page(); - if (unlikely(!page)) - return -ENOMEM; - - dma_info->page = page; - dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(page); - return -ENOMEM; - } - - return 0; -} - -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) -{ - if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) - return; - - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); - put_page(dma_info->page); -} - static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int err; @@ -436,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } @@ -448,7 +424,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; @@ -462,7 +438,7 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; mlx5e_free_rx_mpwqe(rq, wi); } @@ -656,33 +632,154 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *wqe; + u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + +static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + unsigned int data_offset, + int len) +{ + struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; + unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; + void *data = page_address(di->page) + data_offset; + + if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + if (sq->db.xdp.doorbell) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->db.xdp.doorbell = false; + } + rq->stats.xdp_tx_full++; + mlx5e_page_release(rq, di, true); + return; + } + + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, + PCI_DMA_TODEVICE); + + memset(wqe, 0, sizeof(*wqe)); + + /* copy the inline part */ + memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + + sq->db.xdp.di[pi] = *di; + wi->opcode = MLX5_OPCODE_SEND; + wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; + sq->pc += MLX5E_XDP_TX_WQEBBS; + + sq->db.xdp.doorbell = true; + rq->stats.xdp_tx++; +} + +/* returns true if packet was consumed by xdp */ +static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + struct mlx5e_dma_info *di, + void *data, u16 len) +{ + struct xdp_buff xdp; + u32 act; + + if (!prog) + return false; + + xdp.data = data; + xdp.data_end = xdp.data + len; + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + return true; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return true; + } +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); + struct mlx5e_dma_info *di; struct mlx5e_rx_wqe *wqe; - struct sk_buff *skb; __be16 wqe_counter_be; + struct sk_buff *skb; u16 wqe_counter; + void *va, *data; u32 cqe_bcnt; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; - - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); + di = &rq->dma_info[wqe_counter]; + va = page_address(di->page); + data = va + MLX5_RX_HEADROOM; + + dma_sync_single_range_for_cpu(rq->pdev, + di->addr, + MLX5_RX_HEADROOM, + rq->buff.wqe_sz, + DMA_FROM_DEVICE); + prefetch(data); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; - dev_kfree_skb(skb); + mlx5e_page_release(rq, di, true); goto wq_ll_pop; } - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) + goto wq_ll_pop; /* page/packet was consumed by XDP */ + + skb = build_skb(va, RQ_PAGE_SIZE(rq)); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + mlx5e_page_release(rq, di, true); + goto wq_ll_pop; + } + + /* queue up for recycling ..*/ + page_ref_inc(di->page); + mlx5e_page_release(rq, di, true); + + skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -734,7 +831,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); struct sk_buff *skb; u16 cqe_bcnt; @@ -776,6 +873,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) @@ -802,6 +900,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } + if (xdp_sq->db.xdp.doorbell) { + mlx5e_xmit_xdp_doorbell(xdp_sq); + xdp_sq->db.xdp.doorbell = false; + } + mlx5_cqwq_update_db_record(&cq->wq); /* ensure cq space is freed before enabling more cqes */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6af8d79e8c2a..57452fdc5154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -65,6 +65,9 @@ struct mlx5e_sw_stats { u64 rx_csum_none; u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; + u64 rx_xdp_tx; + u64 rx_xdp_tx_full; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -100,6 +103,9 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -278,6 +284,9 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_full; u64 wqe_err; u64 mpwqe_filler; u64 buff_alloc_err; @@ -295,6 +304,9 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index eb0e72537f10..70a717382357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -52,7 +52,6 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - sq->skb[pi] = NULL; sq->pc++; sq->stats.nop++; @@ -82,15 +81,17 @@ static inline void mlx5e_dma_push(struct mlx5e_sq *sq, u32 size, enum mlx5e_dma_map_type map_type) { - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; + + sq->db.txq.dma_fifo[i].addr = addr; + sq->db.txq.dma_fifo[i].size = size; + sq->db.txq.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) { - return &sq->dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; } static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) @@ -221,7 +222,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; + struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; @@ -341,7 +342,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->skb[pi] = skb; + sq->db.txq.skb[pi] = skb; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += wi->num_wqebbs; @@ -368,8 +369,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } /* fill sq edge with nops to avoid wqe wrap around */ - while ((sq->pc & wq->sz_m1) > sq->edge) + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.txq.skb[pi] = NULL; mlx5e_send_nop(sq, false); + } if (bf) sq->bf_budget--; @@ -442,8 +445,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -492,7 +495,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; @@ -501,8 +504,8 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (!skb) { /* nop */ sq->cc++; @@ -520,3 +523,37 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) sq->cc += wi->num_wqebbs; } } + +static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (wi->opcode == MLX5_OPCODE_NOP) { + sq->cc++; + continue; + } + + sq->cc += wi->num_wqebbs; + + mlx5e_page_release(&sq->channel->rq, di, false); + } +} + +void mlx5e_free_sq_descs(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_txq_sq_descs(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_xdp_sq_descs(sq); + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 08d8b0c91f07..5703f19a6a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; @@ -105,6 +105,66 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) sq->cc = sqcc; } +static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_sq, cq); + + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { + sqcc++; + continue; + } + + sqcc += wi->num_wqebbs; + /* Recycle RX page */ + mlx5e_page_release(&sq->channel->rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -121,6 +181,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + if (c->xdp) + busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); + mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 68178819ff12..0efb2ba9a558 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -3,6 +3,13 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o nfp_netvf-objs := \ nfp_net_common.o \ nfp_net_ethtool.o \ + nfp_net_offload.o \ nfp_netvf_main.o +ifeq ($(CONFIG_BPF_SYSCALL),y) +nfp_netvf-objs += \ + nfp_bpf_verifier.o \ + nfp_bpf_jit.o +endif + nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h new file mode 100644 index 000000000000..22484b6fd3e8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_ASM_H__ +#define __NFP_ASM_H__ 1 + +#include "nfp_bpf.h" + +#define REG_NONE 0 + +#define RE_REG_NO_DST 0x020 +#define RE_REG_IMM 0x020 +#define RE_REG_IMM_encode(x) \ + (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) +#define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_XFR 0x080 + +#define UR_REG_XFR 0x180 +#define UR_REG_NN 0x280 +#define UR_REG_NO_DST 0x300 +#define UR_REG_IMM UR_REG_NO_DST +#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) +#define UR_REG_IMM_MAX 0x0ffULL + +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL + +#define nfp_is_br(_insn) \ + (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) + +enum br_mask { + BR_BEQ = 0x00, + BR_BNE = 0x01, + BR_BHS = 0x04, + BR_BLO = 0x05, + BR_BGE = 0x08, + BR_UNC = 0x18, +}; + +enum br_ev_pip { + BR_EV_PIP_UNCOND = 0, + BR_EV_PIP_COND = 1, +}; + +enum br_ctx_signal_state { + BR_CSS_NONE = 2, +}; + +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL + +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL + +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL + +enum immed_width { + IMMED_WIDTH_ALL = 0, + IMMED_WIDTH_BYTE = 1, + IMMED_WIDTH_WORD = 2, +}; + +enum immed_shift { + IMMED_SHIFT_0B = 0, + IMMED_SHIFT_1B = 1, + IMMED_SHIFT_2B = 2, +}; + +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL + +enum shf_op { + SHF_OP_NONE = 0, + SHF_OP_AND = 2, + SHF_OP_OR = 5, +}; + +enum shf_sc { + SHF_SC_R_ROT = 0, + SHF_SC_R_SHF = 1, + SHF_SC_L_SHF = 2, + SHF_SC_R_DSHF = 3, +}; + +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL + +enum alu_op { + ALU_OP_NONE = 0x00, + ALU_OP_ADD = 0x01, + ALU_OP_NEG = 0x04, + ALU_OP_AND = 0x08, + ALU_OP_SUB_C = 0x0d, + ALU_OP_ADD_C = 0x11, + ALU_OP_OR = 0x14, + ALU_OP_SUB = 0x15, + ALU_OP_XOR = 0x18, +}; + +enum alu_dst_ab { + ALU_DST_A = 0, + ALU_DST_B = 1, +}; + +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL + +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL + +struct cmd_tgt_act { + u8 token; + u8 tgt_cmd; +}; + +enum cmd_tgt_map { + CMD_TGT_READ8, + CMD_TGT_WRITE8, + CMD_TGT_READ_LE, + CMD_TGT_READ_SWAP_LE, + __CMD_TGT_MAP_SIZE, +}; + +enum cmd_mode { + CMD_MODE_40b_AB = 0, + CMD_MODE_40b_BA = 1, + CMD_MODE_32b = 4, +}; + +enum cmd_ctx_swap { + CMD_CTX_SWAP = 0, + CMD_CTX_NO_SWAP = 3, +}; + +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL + +enum lcsr_wr_src { + LCSR_WR_AREG, + LCSR_WR_BREG, + LCSR_WR_IMM, +}; + +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h new file mode 100644 index 000000000000..fc220cd04115 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_BPF_H__ +#define __NFP_BPF_H__ 1 + +#include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/list.h> +#include <linux/types.h> + +#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask))) + +/* For branch fixup logic use up-most byte of branch instruction as scratch + * area. Remember to clear this before sending instructions to HW! + */ +#define OP_BR_SPECIAL 0xff00000000000000ULL + +enum br_special { + OP_BR_NORMAL = 0, + OP_BR_GO_OUT, + OP_BR_GO_ABORT, +}; + +enum static_regs { + STATIC_REG_PKT = 1, +#define REG_PKT_BANK ALU_DST_A + STATIC_REG_IMM = 2, /* Bank AB */ +}; + +enum nfp_bpf_action_type { + NN_ACT_TC_DROP, + NN_ACT_TC_REDIR, + NN_ACT_DIRECT, +}; + +/* Software register representation, hardware encoding in asm.h */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), +}; + +#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B) + +#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH)) +#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A)) +#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B)) +#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR)) +#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER)) +#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM)) +#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE)) + +#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) +#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) + +#define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_FLAG_MARK 1 +#define NFP_BPF_ABI_MARK reg_nnr(1) +#define NFP_BPF_ABI_PKT reg_nnr(2) +#define NFP_BPF_ABI_LEN reg_nnr(3) + +struct nfp_prog; +struct nfp_insn_meta; +typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); + +#define nfp_prog_first_meta(nfp_prog) \ + list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_prog_last_meta(nfp_prog) \ + list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_meta_next(meta) list_next_entry(meta, l) +#define nfp_meta_prev(meta) list_prev_entry(meta, l) + +/** + * struct nfp_insn_meta - BPF instruction wrapper + * @insn: BPF instruction + * @off: index of first generated machine instruction (in nfp_prog.prog) + * @n: eBPF instruction number + * @skip: skip this instruction (optimized out) + * @double_cb: callback for second part of the instruction + * @l: link on nfp_prog->insns list + */ +struct nfp_insn_meta { + struct bpf_insn insn; + unsigned int off; + unsigned short n; + bool skip; + instr_cb_t double_cb; + + struct list_head l; +}; + +#define BPF_SIZE_MASK 0x18 + +static inline u8 mbpf_class(const struct nfp_insn_meta *meta) +{ + return BPF_CLASS(meta->insn.code); +} + +static inline u8 mbpf_src(const struct nfp_insn_meta *meta) +{ + return BPF_SRC(meta->insn.code); +} + +static inline u8 mbpf_op(const struct nfp_insn_meta *meta) +{ + return BPF_OP(meta->insn.code); +} + +static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) +{ + return BPF_MODE(meta->insn.code); +} + +/** + * struct nfp_prog - nfp BPF program + * @prog: machine code + * @prog_len: number of valid instructions in @prog array + * @__prog_alloc_len: alloc size of @prog array + * @act: BPF program/action type (TC DA, TC with action, XDP etc.) + * @num_regs: number of registers used by this program + * @regs_per_thread: number of basic registers allocated per thread + * @start_off: address of the first instruction in the memory + * @tgt_out: jump target for normal exit + * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @tgt_done: jump target to get the next packet + * @n_translated: number of successfully translated instructions (for errors) + * @error: error code if something went wrong + * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) + */ +struct nfp_prog { + u64 *prog; + unsigned int prog_len; + unsigned int __prog_alloc_len; + + enum nfp_bpf_action_type act; + + unsigned int num_regs; + unsigned int regs_per_thread; + + unsigned int start_off; + unsigned int tgt_out; + unsigned int tgt_abort; + unsigned int tgt_done; + + unsigned int n_translated; + int error; + + struct list_head insns; +}; + +struct nfp_bpf_result { + unsigned int n_instr; + bool dense_mode; +}; + +#ifdef CONFIG_BPF_SYSCALL +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res); +#else +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + return -ENOTSUPP; +} +#endif + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c new file mode 100644 index 000000000000..3de819acd68c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -0,0 +1,1811 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include <linux/kernel.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/pkt_cls.h> +#include <linux/unistd.h> + +#include "nfp_asm.h" +#include "nfp_bpf.h" + +/* --- NFP prog --- */ +/* Foreach "multiple" entries macros provide pos and next<n> pointers. + * It's safe to modify the next pointers (but not pos). + */ +#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos)) + +#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l), \ + next2 = list_next_entry(next, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l && \ + &(nfp_prog)->insns != &next2->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos), \ + next2 = nfp_meta_next(next)) + +static bool +nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.next != &nfp_prog->insns; +} + +static bool +nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.prev != &nfp_prog->insns; +} + +static void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + +static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) +{ + if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { + nfp_prog->error = -ENOSPC; + return; + } + + nfp_prog->prog[nfp_prog->prog_len] = insn; + nfp_prog->prog_len++; +} + +static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) +{ + return nfp_prog->start_off + nfp_prog->prog_len; +} + +static unsigned int +nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) +{ + return offset - nfp_prog->start_off; +} + +/* --- SW reg --- */ +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; +}; + +static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding %08x\n", swreg); + return 0; + } +} + +static int +swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + return 0; +} + +static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding\n"); + return 0; + } +} + +static int +swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg, + bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + return 0; +} + +/* --- Emitters --- */ +static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8] = { 0x00, 0x42 }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, +}; + +static void +__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) +{ + enum cmd_ctx_swap ctx; + u64 insn; + + if (sync) + ctx = CMD_CTX_SWAP; + else + ctx = CMD_CTX_NO_SWAP; + + insn = FIELD_PREP(OP_CMD_A_SRC, areg) | + FIELD_PREP(OP_CMD_CTX, ctx) | + FIELD_PREP(OP_CMD_B_SRC, breg) | + FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | + FIELD_PREP(OP_CMD_XFER, xfer) | + FIELD_PREP(OP_CMD_CNT, size) | + FIELD_PREP(OP_CMD_SIG, sync) | + FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_MODE, mode); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); + if (err) { + nfp_prog->error = err; + return; + } + if (reg.swap) { + pr_err("cmd can't swap arguments\n"); + nfp_prog->error = -EFAULT; + return; + } + + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); +} + +static void +__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, + enum br_ctx_signal_state css, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BASE | + FIELD_PREP(OP_BR_MASK, mask) | + FIELD_PREP(OP_BR_EV_PIP, ev_pip) | + FIELD_PREP(OP_BR_CSS, css) | + FIELD_PREP(OP_BR_DEFBR, defer) | + FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +{ + if (defer > 2) { + pr_err("BUG: branch defer out of bounds %d\n", defer); + nfp_prog->error = -EFAULT; + return; + } + __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); +} + +static void +emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) +{ + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); +} + +static void +__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, + u8 byte, bool equal, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BBYTE_BASE | + FIELD_PREP(OP_BB_A_SRC, areg) | + FIELD_PREP(OP_BB_BYTE, byte) | + FIELD_PREP(OP_BB_B_SRC, breg) | + FIELD_PREP(OP_BB_I8, imm8) | + FIELD_PREP(OP_BB_EQ, equal) | + FIELD_PREP(OP_BB_DEFBR, defer) | + FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BB_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_byte_neq(struct nfp_prog *nfp_prog, + u32 dst, u8 imm, u8 byte, u16 addr, u8 defer) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, + defer); +} + +static void +__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + enum immed_width width, bool invert, + enum immed_shift shift, bool wr_both) +{ + u64 insn; + + insn = OP_IMMED_BASE | + FIELD_PREP(OP_IMMED_A_SRC, areg) | + FIELD_PREP(OP_IMMED_B_SRC, breg) | + FIELD_PREP(OP_IMMED_IMM, imm_hi) | + FIELD_PREP(OP_IMMED_WIDTH, width) | + FIELD_PREP(OP_IMMED_INV, invert) | + FIELD_PREP(OP_IMMED_SHIFT, shift) | + FIELD_PREP(OP_IMMED_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, + enum immed_width width, bool invert, enum immed_shift shift) +{ + struct nfp_insn_ur_regs reg; + int err; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) { + nfp_prog->error = -EFAULT; + return; + } + + err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, + invert, shift, reg.wr_both); +} + +static void +__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + enum shf_sc sc, u8 shift, + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both) +{ + u64 insn; + + if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { + nfp_prog->error = -EFAULT; + return; + } + + if (sc == SHF_SC_L_SHF) + shift = 32 - shift; + + insn = OP_SHF_BASE | + FIELD_PREP(OP_SHF_A_SRC, areg) | + FIELD_PREP(OP_SHF_SC, sc) | + FIELD_PREP(OP_SHF_B_SRC, breg) | + FIELD_PREP(OP_SHF_I8, i8) | + FIELD_PREP(OP_SHF_SW, sw) | + FIELD_PREP(OP_SHF_DST, dst) | + FIELD_PREP(OP_SHF_SHIFT, shift) | + FIELD_PREP(OP_SHF_OP, op) | + FIELD_PREP(OP_SHF_DST_AB, dst_ab) | + FIELD_PREP(OP_SHF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, + enum shf_sc sc, u8 shift) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(dst, lreg, rreg, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both); +} + +static void +__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_ALU_BASE | + FIELD_PREP(OP_ALU_A_SRC, areg) | + FIELD_PREP(OP_ALU_B_SRC, breg) | + FIELD_PREP(OP_ALU_DST, dst) | + FIELD_PREP(OP_ALU_SW, swap) | + FIELD_PREP(OP_ALU_OP, op) | + FIELD_PREP(OP_ALU_DST_AB, dst_ab) | + FIELD_PREP(OP_ALU_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(dst, lreg, rreg, ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_alu(nfp_prog, reg.dst, reg.dst_ab, + reg.areg, op, reg.breg, reg.swap, reg.wr_both); +} + +static void +__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, + u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, + bool zero, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_LDF_BASE | + FIELD_PREP(OP_LDF_A_SRC, areg) | + FIELD_PREP(OP_LDF_SC, sc) | + FIELD_PREP(OP_LDF_B_SRC, breg) | + FIELD_PREP(OP_LDF_I8, imm8) | + FIELD_PREP(OP_LDF_SW, swap) | + FIELD_PREP(OP_LDF_ZF, zero) | + FIELD_PREP(OP_LDF_BMASK, bmask) | + FIELD_PREP(OP_LDF_SHF, shift) | + FIELD_PREP(OP_LDF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, + u32 dst, u8 bmask, u32 src, bool zero) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, src, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, + reg.i8, zero, reg.swap, reg.wr_both); +} + +static void +emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src, + enum shf_sc sc, u8 shift) +{ + emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); +} + +/* --- Wrappers --- */ +static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) +{ + if (!(imm & 0xffff0000)) { + *val = imm; + *shift = IMMED_SHIFT_0B; + } else if (!(imm & 0xff0000ff)) { + *val = imm >> 8; + *shift = IMMED_SHIFT_1B; + } else if (!(imm & 0x0000ffff)) { + *val = imm >> 16; + *shift = IMMED_SHIFT_2B; + } else { + return false; + } + + return true; +} + +static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) +{ + enum immed_shift shift; + u16 val; + + if (pack_immed(imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); + } else if (pack_immed(~imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); + } else { + emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, + false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, + false, IMMED_SHIFT_2B); + } +} + +/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(UR_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +/* re_load_imm_any() - encode immediate or use tmp register (restricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(RE_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +static void +wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, + enum br_special special) +{ + emit_br(nfp_prog, mask, 0, 0); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_BR_SPECIAL, special); +} + +static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) +{ + emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src)); +} + +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, + u16 src, bool src_valid, u8 size) +{ + unsigned int i; + u16 shift, sz; + u32 tmp_reg; + + /* We load the value from the address indicated in @offset and then + * shift out the data we don't need. Note: this is big endian! + */ + sz = size < 4 ? 4 : size; + shift = size < 4 ? 4 - size : 0; + + if (src_valid) { + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), + reg_a(src), ALU_OP_ADD, tmp_reg); + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog)); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); + } else { + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, + imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), tmp_reg, sz - 1, true); + } + + i = 0; + if (shift) + emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE, + reg_xfer(0), SHF_SC_R_SHF, shift * 8); + else + for (; i * 4 < size; i++) + emit_alu(nfp_prog, reg_both(i), + reg_none(), ALU_OP_NONE, reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(1), 0); + + return 0; +} + +static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +{ + return construct_data_ind_ld(nfp_prog, offset, 0, false, size); +} + +static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) +{ + emit_alu(nfp_prog, NFP_BPF_ABI_MARK, + reg_none(), ALU_OP_NONE, reg_b(src)); + emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS, + NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK)); + + return 0; +} + +static void +wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) +{ + u32 tmp_reg; + + if (alu_op == ALU_OP_AND) { + if (!imm) + wrp_immed(nfp_prog, reg_both(dst), 0); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_OR) { + if (!~imm) + wrp_immed(nfp_prog, reg_both(dst), ~0U); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_XOR) { + if (!~imm) + emit_alu(nfp_prog, reg_both(dst), reg_none(), + ALU_OP_NEG, reg_b(dst)); + if (!imm || !~imm) + return; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); +} + +static int +wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); + + return 0; +} + +static int +wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + emit_alu(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), alu_op, reg_b(src + 1)); + + return 0; +} + +static int +wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int +wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static void +wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, + enum br_mask br_mask, u16 off) +{ + emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); + emit_br(nfp_prog, br_mask, off, 0); +} + +static int +wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, enum br_mask br_mask) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, + insn->src_reg * 2, br_mask, insn->off); + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); + + return 0; +} + +static int +wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u8 reg = insn->dst_reg * 2; + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); + + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +static int +wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (swap) { + areg ^= breg; + breg ^= areg; + areg ^= breg; + } + + emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +/* --- Callbacks --- */ +static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + + return 0; +} + +static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u64 imm = meta->insn.imm; /* sign extend */ + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); + + return 0; +} + +static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); +} + +static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_ADD, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); + + return 0; +} + +static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_SUB, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); + + return 0; +} + +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0); + + return 0; +} + +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); +} + +static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); +} + +static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); +} + +static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); +} + +static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); +} + +static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (!insn->imm) + return 1; /* TODO: zero shift means indirect */ + + emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), + reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), + SHF_SC_L_SHF, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), + meta->insn.imm); + + return 0; +} + +static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + meta->double_cb = imm_ld8_part2; + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + + return 0; +} + +static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 1); +} + +static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 2); +} + +static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 4); +} + +static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 1); +} + +static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 2); +} + +static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 4); +} + +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, len)) + emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); + else + return -ENOTSUPP; + + return 0; +} + +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, mark)) + return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); + + return -ENOTSUPP; +} + +static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off < 0) /* TODO */ + return -ENOTSUPP; + emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); + + return 0; +} + +static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1); + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_a(nfp_prog), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + or1 = imm_a(nfp_prog); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + or2 = imm_b(nfp_prog); + } + + emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); +} + +static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); +} + +static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + meta->skip = true; + return 0; + } + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + return 0; +} + +static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), + ALU_OP_XOR, reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), + ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), + imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); +} + +static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); +} + +static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); +} + +static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); +} + +static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + + return 0; +} + +static const instr_cb_t instr_cb[256] = { + [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, + [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, + [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, + [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, + [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, + [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, + [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, + [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, + [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, + [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, + [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, + [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, + [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, + [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, + [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, + [BPF_ALU | BPF_AND | BPF_X] = and_reg, + [BPF_ALU | BPF_AND | BPF_K] = and_imm, + [BPF_ALU | BPF_OR | BPF_X] = or_reg, + [BPF_ALU | BPF_OR | BPF_K] = or_imm, + [BPF_ALU | BPF_ADD | BPF_X] = add_reg, + [BPF_ALU | BPF_ADD | BPF_K] = add_imm, + [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, + [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, + [BPF_LD | BPF_ABS | BPF_B] = data_ld1, + [BPF_LD | BPF_ABS | BPF_H] = data_ld2, + [BPF_LD | BPF_ABS | BPF_W] = data_ld4, + [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, + [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, + [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, + [BPF_JMP | BPF_JA | BPF_K] = jump, + [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, + [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, + [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, + [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, + [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, + [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, + [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, + [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP | BPF_EXIT] = goto_out, +}; + +/* --- Misc code --- */ +static void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +/* --- Assembler logic --- */ +static int nfp_fixup_branches(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *next; + u32 off, br_idx; + u32 idx; + + nfp_for_each_insn_walk2(nfp_prog, meta, next) { + if (meta->skip) + continue; + if (BPF_CLASS(meta->insn.code) != BPF_JMP) + continue; + + br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1; + if (!nfp_is_br(nfp_prog->prog[br_idx])) { + pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", + br_idx, meta->insn.code, nfp_prog->prog[br_idx]); + return -ELOOP; + } + /* Leave special branches for later */ + if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + continue; + + /* Find the target offset in assembler realm */ + off = meta->insn.off; + if (!off) { + pr_err("Fixup found zero offset!!\n"); + return -ELOOP; + } + + while (off && nfp_meta_has_next(nfp_prog, next)) { + next = nfp_meta_next(next); + off--; + } + if (off) { + pr_err("Fixup found too large jump!! %d\n", off); + return -ELOOP; + } + + if (next->skip) { + pr_err("Branch landing on removed instruction!!\n"); + return -ELOOP; + } + + for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); + idx <= br_idx; idx++) { + if (!nfp_is_br(nfp_prog->prog[idx])) + continue; + br_set_offset(&nfp_prog->prog[idx], next->off); + } + } + + /* Fixup 'goto out's separately, they can be scattered around */ + for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { + enum br_special special; + + if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) + continue; + + special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); + switch (special) { + case OP_BR_NORMAL: + break; + case OP_BR_GO_OUT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_out); + break; + case OP_BR_GO_ABORT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_abort); + break; + } + + nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; + } + + return 0; +} + +static void nfp_intro(struct nfp_prog *nfp_prog) +{ + emit_alu(nfp_prog, pkt_reg(nfp_prog), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); +} + +static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) +{ + const u8 act2code[] = { + [NN_ACT_TC_DROP] = 0x22, + [NN_ACT_TC_REDIR] = 0x24 + }; + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + wrp_immed(nfp_prog, reg_both(0), 0); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + /* Legacy TC mode: + * 0 0x11 -> pass, count as stat0 + * -1 drop 0x22 -> drop, count as stat1 + * redir 0x24 -> redir, count as stat1 + * ife mark 0x21 -> pass, count as stat1 + * ife + tx 0x24 -> redir, count as stat1 + */ + emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), + SHF_SC_L_SHF, 16); +} + +static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) +{ + /* TC direct-action mode: + * 0,1 ok NOT SUPPORTED[1] + * 2 drop 0x22 -> drop, count as stat1 + * 4,5 nuke 0x02 -> drop + * 7 redir 0x44 -> redir, count as stat2 + * * unspec 0x11 -> pass, count as stat0 + * + * [1] We can't support OK and RECLASSIFY because we can't tell TC + * the exact decision made. We are forced to support UNSPEC + * to handle aborts so that's the only one we handle for passing + * packets up the stack. + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 7 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + + wrp_immed(nfp_prog, reg_b(2), 0x41221211); + wrp_immed(nfp_prog, reg_b(3), 0x41001211); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_a(2), + reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_shf(nfp_prog, reg_b(2), + reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + +static void nfp_outro(struct nfp_prog *nfp_prog) +{ + switch (nfp_prog->act) { + case NN_ACT_DIRECT: + nfp_outro_tc_da(nfp_prog); + break; + case NN_ACT_TC_DROP: + case NN_ACT_TC_REDIR: + nfp_outro_tc_legacy(nfp_prog); + break; + } +} + +static int nfp_translate(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int err; + + nfp_intro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + instr_cb_t cb = instr_cb[meta->insn.code]; + + meta->off = nfp_prog_current_offset(nfp_prog); + + if (meta->skip) { + nfp_prog->n_translated++; + continue; + } + + if (nfp_meta_has_prev(nfp_prog, meta) && + nfp_meta_prev(meta)->double_cb) + cb = nfp_meta_prev(meta)->double_cb; + if (!cb) + return -ENOENT; + err = cb(nfp_prog, meta); + if (err) + return err; + + nfp_prog->n_translated++; + } + + nfp_outro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + return nfp_fixup_branches(nfp_prog); +} + +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + unsigned int i; + + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + + list_add_tail(&meta->l, &nfp_prog->insns); + } + + return 0; +} + +/* --- Optimizations --- */ +static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + /* Programs converted from cBPF start with register xoring */ + if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && + insn.src_reg == insn.dst_reg) + continue; + + /* Programs start with R6 = R1 but we ignore the skb pointer */ + if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn.src_reg == 1 && insn.dst_reg == 6) + meta->skip = true; + + /* Return as soon as something doesn't match */ + if (!meta->skip) + return; + } +} + +/* Try to rename registers so that program uses only low ones */ +static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog) +{ + bool reg_used[MAX_BPF_REG] = {}; + u8 tgt_reg[MAX_BPF_REG] = {}; + struct nfp_insn_meta *meta; + unsigned int i, j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + + reg_used[meta->insn.src_reg] = true; + reg_used[meta->insn.dst_reg] = true; + } + + for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) { + if (!reg_used[i]) + continue; + + tgt_reg[i] = j++; + } + nfp_prog->num_regs = j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + meta->insn.src_reg = tgt_reg[meta->insn.src_reg]; + meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg]; + } + + return 0; +} + +/* Remove masking after load since our load guarantees this is not needed */ +static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + const s32 exp_mask[] = { + [BPF_B] = 0x000000ffU, + [BPF_H] = 0x0000ffffU, + [BPF_W] = 0xffffffffU, + }; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn insn, next; + + insn = meta1->insn; + next = meta2->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + + if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) + continue; + + if (!exp_mask[BPF_SIZE(insn.code)]) + continue; + if (exp_mask[BPF_SIZE(insn.code)] != next.imm) + continue; + + if (next.src_reg || next.dst_reg) + continue; + + meta2->skip = true; + } +} + +static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2, *meta3; + + nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { + struct bpf_insn insn, next1, next2; + + insn = meta1->insn; + next1 = meta2->insn; + next2 = meta3->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + if (BPF_SIZE(insn.code) != BPF_W) + continue; + + if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && + !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) + continue; + + if (next1.src_reg || next1.dst_reg || + next2.src_reg || next2.dst_reg) + continue; + + if (next1.imm != 0x20 || next2.imm != 0x20) + continue; + + meta2->skip = true; + meta3->skip = true; + } +} + +static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) +{ + int ret; + + nfp_bpf_opt_reg_init(nfp_prog); + + ret = nfp_bpf_opt_reg_rename(nfp_prog); + if (ret) + return ret; + + nfp_bpf_opt_ld_mask(nfp_prog); + nfp_bpf_opt_ld_shift(nfp_prog); + + return 0; +} + +/** + * nfp_bpf_jit() - translate BPF code into NFP assembly + * @filter: kernel BPF filter struct + * @prog_mem: memory to store assembler instructions + * @act: action attached to this eBPF program + * @prog_start: offset of the first instruction when loaded + * @prog_done: where to jump on exit + * @prog_sz: size of @prog_mem in instructions + * @res: achieved parameters of translation results + */ +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, + enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + struct nfp_prog *nfp_prog; + int ret; + + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->act = act; + nfp_prog->start_off = prog_start; + nfp_prog->tgt_done = prog_done; + + ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); + if (ret) + goto out; + + ret = nfp_prog_verify(nfp_prog, filter); + if (ret) + goto out; + + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + goto out; + + if (nfp_prog->num_regs <= 7) + nfp_prog->regs_per_thread = 16; + else + nfp_prog->regs_per_thread = 32; + + nfp_prog->prog = prog_mem; + nfp_prog->__prog_alloc_len = prog_sz; + + ret = nfp_translate(nfp_prog); + if (ret) { + pr_err("Translation failed with error %d (translated: %u)\n", + ret, nfp_prog->n_translated); + ret = -EINVAL; + } + + res->n_instr = nfp_prog->prog_len; + res->dense_mode = nfp_prog->num_regs <= 7; +out: + nfp_prog_free(nfp_prog); + + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c new file mode 100644 index 000000000000..144cae87f63a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include <linux/bpf.h> +#include <linux/bpf_verifier.h> +#include <linux/kernel.h> +#include <linux/pkt_cls.h> + +#include "nfp_bpf.h" + +/* Analyzer/verifier definitions */ +struct nfp_bpf_analyzer_priv { + struct nfp_prog *prog; + struct nfp_insn_meta *meta; +}; + +static struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns) +{ + unsigned int forward, backward, i; + + backward = meta->n - insn_idx; + forward = insn_idx - meta->n; + + if (min(forward, backward) > n_insns - insn_idx - 1) { + backward = n_insns - insn_idx - 1; + meta = nfp_prog_last_meta(nfp_prog); + } + if (min(forward, backward) > insn_idx && backward > insn_idx) { + forward = insn_idx; + meta = nfp_prog_first_meta(nfp_prog); + } + + if (forward < backward) + for (i = 0; i < forward; i++) + meta = nfp_meta_next(meta); + else + for (i = 0; i < backward; i++) + meta = nfp_meta_prev(meta); + + return meta; +} + +static int +nfp_bpf_check_exit(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + + if (reg0->type != CONST_IMM) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (nfp_prog->act != NN_ACT_DIRECT && + reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT && + reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN && + reg0->imm != TC_ACT_QUEUED) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + return 0; +} + +static int +nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env, u8 reg) +{ + if (env->cur_state.regs[reg].type != PTR_TO_CTX) + return -EINVAL; + + return 0; +} + +static int +nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +{ + struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; + struct nfp_insn_meta *meta = priv->meta; + + meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); + priv->meta = meta; + + if (meta->insn.src_reg == BPF_REG_10 || + meta->insn.dst_reg == BPF_REG_10) { + pr_err("stack not yet supported\n"); + return -EINVAL; + } + if (meta->insn.src_reg >= MAX_BPF_REG || + meta->insn.dst_reg >= MAX_BPF_REG) { + pr_err("program uses extended registers - jit hardening?\n"); + return -EINVAL; + } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + return nfp_bpf_check_exit(priv->prog, env); + + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.src_reg); + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.dst_reg); + + return 0; +} + +static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { + .insn_hook = nfp_verify_insn, +}; + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) +{ + struct nfp_bpf_analyzer_priv *priv; + int ret; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->prog = nfp_prog; + priv->meta = nfp_prog_first_meta(nfp_prog); + + ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); + + kfree(priv); + + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 690635660195..ed824e11a1e3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -62,6 +62,9 @@ /* Max time to wait for NFP to respond on updates (in seconds) */ #define NFP_NET_POLL_TIMEOUT 5 +/* Interval for reading offloaded filter stats */ +#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100) + /* Bar allocation */ #define NFP_NET_CTRL_BAR 0 #define NFP_NET_Q0_BAR 2 @@ -220,7 +223,7 @@ struct nfp_net_tx_ring { #define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) #define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) #define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) -#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8)) #define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) #define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) #define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) @@ -266,6 +269,8 @@ struct nfp_net_rx_desc { }; }; +#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0) + struct nfp_net_rx_hash { __be32 hash_type; __be32 hash; @@ -405,6 +410,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, fw_ver->minor == minor; } +struct nfp_stat_pair { + u64 pkts; + u64 bytes; +}; + /** * struct nfp_net - NFP network device structure * @pdev: Backpointer to PCI device @@ -413,6 +423,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @is_vf: Is the driver attached to a VF? * @is_nfp3200: Is the driver for a NFP-3200 card? * @fw_loaded: Is the firmware loaded? + * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @ctrl: Local copy of the control register/word. * @fl_bufsz: Currently configured size of the freelist buffers * @rx_offset: Offset in the RX buffers where packet data starts @@ -427,6 +438,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table + * @rx_filter: Filter offload statistics - dropped packets/bytes + * @rx_filter_prev: Filter offload statistics - values from previous update + * @rx_filter_change: Jiffies when statistics last changed + * @rx_filter_stats_timer: Timer for polling filter offload statistics + * @rx_filter_lock: Lock protecting timer state changes (teardown) * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware * @num_tx_rings: Currently configured number of TX rings @@ -473,6 +489,7 @@ struct nfp_net { unsigned is_vf:1; unsigned is_nfp3200:1; unsigned fw_loaded:1; + unsigned bpf_offload_skip_sw:1; u32 ctrl; u32 fl_bufsz; @@ -502,6 +519,11 @@ struct nfp_net { u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + struct nfp_stat_pair rx_filter, rx_filter_prev; + unsigned long rx_filter_change; + struct timer_list rx_filter_stats_timer; + spinlock_t rx_filter_lock; + int max_tx_rings; int max_rx_rings; @@ -561,12 +583,28 @@ struct nfp_net { /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ +static inline u16 nn_readb(struct nfp_net *nn, int off) +{ + return readb(nn->ctrl_bar + off); +} + static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) { writeb(val, nn->ctrl_bar + off); } -/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ +/* NFP-3200 can't handle 16-bit accesses too well */ +static inline u16 nn_readw(struct nfp_net *nn, int off) +{ + WARN_ON_ONCE(nn->is_nfp3200); + return readw(nn->ctrl_bar + off); +} + +static inline void nn_writew(struct nfp_net *nn, int off, u16 val) +{ + WARN_ON_ONCE(nn->is_nfp3200); + writew(val, nn->ctrl_bar + off); +} static inline u32 nn_readl(struct nfp_net *nn, int off) { @@ -757,4 +795,9 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +void nfp_net_filter_stats_timer(unsigned long data); +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf); + #endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e4924de0f..415691edcaa5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -60,6 +60,7 @@ #include <linux/ktime.h> +#include <net/pkt_cls.h> #include <net/vxlan.h> #include "nfp_net_ctrl.h" @@ -1292,38 +1293,72 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, } } -/** - * nfp_net_set_hash() - Set SKB hash data - * @netdev: adapter's net_device structure - * @skb: SKB to set the hash data on - * @rxd: RX descriptor - * - * The RSS hash and hash-type are pre-pended to the packet data. - * Extract and decode it and set the skb fields. - */ static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, - struct nfp_net_rx_desc *rxd) + unsigned int type, __be32 *hash) { - struct nfp_net_rx_hash *rx_hash; - - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || - !(netdev->features & NETIF_F_RXHASH)) + if (!(netdev->features & NETIF_F_RXHASH)) return; - rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); - - switch (be32_to_cpu(rx_hash->hash_type)) { + switch (type) { case NFP_NET_RSS_IPV4: case NFP_NET_RSS_IPV6: case NFP_NET_RSS_IPV6_EX: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3); break; default: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4); break; } } +static void +nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) +{ + struct nfp_net_rx_hash *rx_hash; + + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) + return; + + rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); + + nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type), + &rx_hash->hash); +} + +static void * +nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, + int meta_len) +{ + u8 *data = skb->data - meta_len; + u32 meta_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_net_set_hash(netdev, skb, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + skb->mark = get_unaligned_be32(data); + data += 4; + break; + default: + return NULL; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; + } + + return data; +} + /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1438,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) skb_reserve(skb, nn->rx_offset); skb_put(skb, data_len - meta_len); - nfp_net_set_hash(nn->netdev, skb, rxd); - /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; r_vec->rx_bytes += skb->len; u64_stats_update_end(&r_vec->rx_sync); + if (nn->fw_ver.major <= 3) { + nfp_net_set_hash_desc(nn->netdev, skb, rxd); + } else if (meta_len) { + void *end; + + end = nfp_net_parse_meta(nn->netdev, skb, meta_len); + if (unlikely(end != skb->data)) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "invalid RX packet metadata\n"); + continue; + } + } + skb_record_rx_queue(skb, rx_ring->idx); skb->protocol = eth_type_trans(skb, nn->netdev); @@ -2382,6 +2432,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, return stats; } +static bool nfp_net_ebpf_capable(struct nfp_net *nn) +{ + if (nn->cap & NFP_NET_CFG_CTRL_BPF && + nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) + return true; + return false; +} + +static int +nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -ENOTSUPP; + if (proto != htons(ETH_P_ALL)) + return -ENOTSUPP; + + if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) + return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf); + + return -EINVAL; +} + static int nfp_net_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2436,6 +2511,11 @@ static int nfp_net_set_features(struct net_device *netdev, new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; } + if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) { + nn_err(nn, "Cannot disable HW TC offload while in use\n"); + return -EBUSY; + } + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", netdev->features, features, changed); @@ -2585,6 +2665,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { .ndo_stop = nfp_net_netdev_close, .ndo_start_xmit = nfp_net_tx, .ndo_get_stats64 = nfp_net_stat64, + .ndo_setup_tc = nfp_net_setup_tc, .ndo_tx_timeout = nfp_net_tx_timeout, .ndo_set_rx_mode = nfp_net_set_rx_mode, .ndo_change_mtu = nfp_net_change_mtu, @@ -2610,7 +2691,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -2627,7 +2708,8 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", - nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", + nfp_net_ebpf_capable(nn) ? "BPF " : ""); } /** @@ -2670,10 +2752,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->rx_filter_lock); spin_lock_init(&nn->link_status_lock); setup_timer(&nn->reconfig_timer, nfp_net_reconfig_timer, (unsigned long)nn); + setup_timer(&nn->rx_filter_stats_timer, + nfp_net_filter_stats_timer, (unsigned long)nn); return nn; } @@ -2795,6 +2880,9 @@ int nfp_net_netdev_init(struct net_device *netdev) netdev->features = netdev->hw_features; + if (nfp_net_ebpf_capable(nn)) + netdev->hw_features |= NETIF_F_HW_TC; + /* Advertise but disable TSO by default. */ netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ad6c4e31cedd..93b10b441acb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -66,6 +66,13 @@ #define NFP_NET_LSO_MAX_HDR_SZ 255 /** + * Prepend field types + */ +#define NFP_NET_META_FIELD_SIZE 4 +#define NFP_NET_META_HASH 1 /* next field carries hash type */ +#define NFP_NET_META_MARK 2 + +/** * Hash type pre-pended when a RSS hash was computed */ #define NFP_NET_RSS_NONE 0 @@ -123,6 +130,7 @@ #define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ #define NFP_NET_CFG_UPDATE 0x0004 #define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ #define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ @@ -134,6 +142,7 @@ #define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ #define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ #define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ #define NFP_NET_CFG_TXRS_ENABLE 0x0008 #define NFP_NET_CFG_RXRS_ENABLE 0x0010 @@ -196,10 +205,37 @@ #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** - * 64B reserved for future use (0x0080 - 0x00c0) + * NFP6000 - BPF section + * @NFP_NET_CFG_BPF_ABI: BPF ABI version + * @NFP_NET_CFG_BPF_CAP: BPF capabilities + * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ -#define NFP_NET_CFG_RESERVED 0x0080 -#define NFP_NET_CFG_RESERVED_SZ 0x0040 +#define NFP_NET_CFG_BPF_ABI 0x0080 +#define NFP_NET_BPF_ABI 1 +#define NFP_NET_CFG_BPF_CAP 0x0081 +#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ +#define NFP_NET_CFG_BPF_MAX_LEN 0x0082 +#define NFP_NET_CFG_BPF_START 0x0084 +#define NFP_NET_CFG_BPF_DONE 0x0086 +#define NFP_NET_CFG_BPF_STACK_SZ 0x0088 +#define NFP_NET_CFG_BPF_INL_MTU 0x0089 +#define NFP_NET_CFG_BPF_SIZE 0x008e +#define NFP_NET_CFG_BPF_ADDR 0x0090 +#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */ +#define NFP_NET_CFG_BPF_CFG_MASK 7ULL +#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) + +/** + * 40B reserved for future use (0x0098 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 /** * RSS configuration (0x0100 - 0x01ac): @@ -303,6 +339,15 @@ #define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) #define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) +#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) +#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) +#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0) +#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8) +#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0) +#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8) +#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) +#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) + /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 4c9897220969..3418f2277e9d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = { {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, + + {"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)}, + {"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)}, + /* see comments in outro functions in nfp_bpf_jit.c to find out + * how different BPF modes use app-specific counters + */ + {"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)}, + {"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)}, + {"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)}, + {"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)}, + {"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)}, + {"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)}, }; #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c new file mode 100644 index 000000000000..43f42f842eda --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -0,0 +1,291 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_offload.c + * Netronome network device driver: TC offload functions for PF and VF + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <linux/list.h> + +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> + +#include "nfp_bpf.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +void nfp_net_filter_stats_timer(unsigned long data) +{ + struct nfp_net *nn = (void *)data; + struct nfp_stat_pair latest; + + spin_lock_bh(&nn->rx_filter_lock); + + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + mod_timer(&nn->rx_filter_stats_timer, + jiffies + NFP_NET_STAT_POLL_IVL); + + spin_unlock_bh(&nn->rx_filter_lock); + + latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + + if (latest.pkts != nn->rx_filter.pkts) + nn->rx_filter_change = jiffies; + + nn->rx_filter = latest; +} + +static void nfp_net_bpf_stats_reset(struct nfp_net *nn) +{ + nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + nn->rx_filter_prev = nn->rx_filter; + nn->rx_filter_change = jiffies; +} + +static int +nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + struct tc_action *a; + LIST_HEAD(actions); + u64 bytes, pkts; + + pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts; + bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes; + bytes -= pkts * ETH_HLEN; + + nn->rx_filter_prev = nn->rx_filter; + + preempt_disable(); + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change); + + preempt_enable(); + + return 0; +} + +static int +nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + /* TC direct action */ + if (cls_bpf->exts_integrated) { + if (tc_no_actions(cls_bpf->exts)) + return NN_ACT_DIRECT; + + return -ENOTSUPP; + } + + /* TC legacy mode */ + if (!tc_single_action(cls_bpf->exts)) + return -ENOTSUPP; + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) + return NN_ACT_TC_DROP; + + if (is_tcf_mirred_redirect(a) && + tcf_mirred_ifindex(a) == nn->netdev->ifindex) + return NN_ACT_TC_REDIR; + } + + return -ENOTSUPP; +} + +static int +nfp_net_bpf_offload_prepare(struct nfp_net *nn, + struct tc_cls_bpf_offload *cls_bpf, + struct nfp_bpf_result *res, + void **code, dma_addr_t *dma_addr, u16 max_instr) +{ + unsigned int code_sz = max_instr * sizeof(u64); + enum nfp_bpf_action_type act; + u16 start_off, done_off; + unsigned int max_mtu; + int ret; + + ret = nfp_net_bpf_get_act(nn, cls_bpf); + if (ret < 0) + return ret; + act = ret; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (max_mtu < nn->netdev->mtu) { + nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); + return -ENOTSUPP; + } + + start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + *code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr, + GFP_KERNEL); + if (!*code) + return -ENOMEM; + + ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, + max_instr, res); + if (ret) + goto out; + + return 0; + +out: + dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr); + return ret; +} + +static void +nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, + void *code, dma_addr_t dma_addr, + unsigned int code_sz, unsigned int n_instr, + bool dense_mode) +{ + u64 bpf_addr = dma_addr; + int err; + + nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); + + if (dense_mode) + bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; + + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); + + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + nn_err(nn, "FW command error while loading BPF: %d\n", err); + + /* Enable passing packets through BPF function */ + nn->ctrl |= NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + nn_err(nn, "FW command error while enabling BPF: %d\n", err); + + dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); + + nfp_net_bpf_stats_reset(nn); + mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL); +} + +static int nfp_net_bpf_stop(struct nfp_net *nn) +{ + if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) + return 0; + + spin_lock_bh(&nn->rx_filter_lock); + nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + spin_unlock_bh(&nn->rx_filter_lock); + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + + del_timer_sync(&nn->rx_filter_stats_timer); + nn->bpf_offload_skip_sw = 0; + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); +} + +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf) +{ + struct nfp_bpf_result res; + dma_addr_t dma_addr; + u16 max_instr; + void *code; + int err; + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + /* There is nothing stopping us from implementing seamless + * replace but the simple method of loading I adopted in + * the firmware does not handle atomic replace (i.e. we have to + * stop the BPF offload and re-enable it). Leaking-in a few + * frames which didn't have BPF applied in the hardware should + * be fine if software fallback is available, though. + */ + if (nn->bpf_offload_skip_sw) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_stop(nn); + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_ADD: + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_stop(nn); + + case TC_CLSBPF_STATS: + return nfp_net_bpf_stats_update(nn, cls_bpf); + + default: + return -ENOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index f7062cb648e1..2800bbf65a89 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); } else { switch (fw_ver.major) { - case 1 ... 3: + case 1 ... 4: if (is_nfp3200) { stride = 2; tx_bar_no = NFP_NET_Q0_BAR; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index dd204d9704c6..77a5364f7a10 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1269,13 +1269,13 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) if (IS_ERR(ptp->phc_clock)) { rc = PTR_ERR(ptp->phc_clock); goto fail3; - } - - INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); - ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); - if (!ptp->pps_workwq) { - rc = -ENOMEM; - goto fail4; + } else if (ptp->phc_clock) { + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } } } ptp->nic_ts_enabled = false; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 170a18b61281..6e3b82972ce8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -187,7 +187,7 @@ int stmmac_ptp_register(struct stmmac_priv *priv) if (IS_ERR(priv->ptp_clock)) { priv->ptp_clock = NULL; pr_err("ptp_clock_register() failed on %s\n", priv->dev->name); - } else + } else if (priv->ptp_clock) pr_debug("Added PTP HW clock successfully on %s\n", priv->dev->name); |