summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c370
-rw-r--r--drivers/net/dsa/mv88e6xxx/mv88e6xxx.h63
-rw-r--r--drivers/net/dsa/qca8k.c16
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/Makefile2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h29
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c721
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h48
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c344
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c483
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h57
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h294
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h26
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c97
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.h8
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c23
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_ethtool.c132
-rw-r--r--drivers/net/ethernet/intel/e1000e/ptp.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ptp.c2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_clock.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c28
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/srq.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_clock.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c462
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c349
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c65
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile7
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h233
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_bpf.h212
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c1811
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c171
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h47
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c134
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h51
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_offload.c291
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c2
-rw-r--r--drivers/net/ethernet/sfc/ptp.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c2
-rw-r--r--include/linux/bpf_verifier.h90
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/ptp_clock_kernel.h5
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--include/net/pkt_cls.h16
-rw-r--r--include/net/sctp/sctp.h10
-rw-r--r--include/uapi/linux/pkt_cls.h1
-rw-r--r--include/uapi/linux/tc_act/tc_vlan.h1
-rw-r--r--kernel/bpf/verifier.c406
-rw-r--r--net/core/skbuff.c15
-rw-r--r--net/ipv4/tcp_output.c72
-rw-r--r--net/netfilter/xt_sctp.c2
-rw-r--r--net/sched/act_mirred.c8
-rw-r--r--net/sched/act_vlan.c29
-rw-r--r--net/sched/cls_bpf.c117
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c13
-rw-r--r--net/sctp/input.c8
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/output.c12
-rw-r--r--net/sctp/sm_make_chunk.c28
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/sctp/ulpevent.c4
72 files changed, 6428 insertions, 1238 deletions
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 1d71802396fb..25bd3fa744d9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -216,6 +216,22 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
return 0;
}
+int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
+ u16 *val)
+{
+ int addr = chip->info->port_base_addr + port;
+
+ return mv88e6xxx_read(chip, addr, reg, val);
+}
+
+int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
+ u16 val)
+{
+ int addr = chip->info->port_base_addr + port;
+
+ return mv88e6xxx_write(chip, addr, reg, val);
+}
+
static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy,
int reg, u16 *val)
{
@@ -585,23 +601,23 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
struct phy_device *phydev)
{
struct mv88e6xxx_chip *chip = ds->priv;
- u32 reg;
- int ret;
+ u16 reg;
+ int err;
if (!phy_is_pseudo_fixed_link(phydev))
return;
mutex_lock(&chip->reg_lock);
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
- if (ret < 0)
+ err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+ if (err)
goto out;
- reg = ret & ~(PORT_PCS_CTRL_LINK_UP |
- PORT_PCS_CTRL_FORCE_LINK |
- PORT_PCS_CTRL_DUPLEX_FULL |
- PORT_PCS_CTRL_FORCE_DUPLEX |
- PORT_PCS_CTRL_UNFORCED);
+ reg &= ~(PORT_PCS_CTRL_LINK_UP |
+ PORT_PCS_CTRL_FORCE_LINK |
+ PORT_PCS_CTRL_DUPLEX_FULL |
+ PORT_PCS_CTRL_FORCE_DUPLEX |
+ PORT_PCS_CTRL_UNFORCED);
reg |= PORT_PCS_CTRL_FORCE_LINK;
if (phydev->link)
@@ -639,7 +655,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
PORT_PCS_CTRL_RGMII_DELAY_TXCLK);
}
- _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg);
+ mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
out:
mutex_unlock(&chip->reg_lock);
@@ -799,22 +815,22 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
{
u32 low;
u32 high = 0;
- int ret;
+ int err;
+ u16 reg;
u64 value;
switch (s->type) {
case PORT:
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg);
- if (ret < 0)
+ err = mv88e6xxx_port_read(chip, port, s->reg, &reg);
+ if (err)
return UINT64_MAX;
- low = ret;
+ low = reg;
if (s->sizeof_stat == 4) {
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port),
- s->reg + 1);
- if (ret < 0)
+ err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
+ if (err)
return UINT64_MAX;
- high = ret;
+ high = reg;
}
break;
case BANK0:
@@ -893,6 +909,8 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
struct ethtool_regs *regs, void *_p)
{
struct mv88e6xxx_chip *chip = ds->priv;
+ int err;
+ u16 reg;
u16 *p = _p;
int i;
@@ -903,11 +921,10 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
mutex_lock(&chip->reg_lock);
for (i = 0; i < 32; i++) {
- int ret;
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i);
- if (ret >= 0)
- p[i] = ret;
+ err = mv88e6xxx_port_read(chip, port, i, &reg);
+ if (!err)
+ p[i] = reg;
}
mutex_unlock(&chip->reg_lock);
@@ -938,7 +955,7 @@ static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
e->eee_enabled = !!(reg & 0x0200);
e->tx_lpi_enabled = !!(reg & 0x0100);
- err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, &reg);
+ err = mv88e6xxx_port_read(chip, port, PORT_STATUS, &reg);
if (err)
goto out;
@@ -1106,12 +1123,13 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port,
u8 state)
{
struct dsa_switch *ds = chip->ds;
- int reg, ret = 0;
+ u16 reg;
+ int err;
u8 oldstate;
- reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL);
- if (reg < 0)
- return reg;
+ err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, &reg);
+ if (err)
+ return err;
oldstate = reg & PORT_CONTROL_STATE_MASK;
@@ -1124,23 +1142,22 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port,
oldstate == PORT_CONTROL_STATE_FORWARDING) &&
(state == PORT_CONTROL_STATE_DISABLED ||
state == PORT_CONTROL_STATE_BLOCKING)) {
- ret = _mv88e6xxx_atu_remove(chip, 0, port, false);
- if (ret)
- return ret;
+ err = _mv88e6xxx_atu_remove(chip, 0, port, false);
+ if (err)
+ return err;
}
reg = (reg & ~PORT_CONTROL_STATE_MASK) | state;
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL,
- reg);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+ if (err)
+ return err;
netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
mv88e6xxx_port_state_names[state],
mv88e6xxx_port_state_names[oldstate]);
}
- return ret;
+ return err;
}
static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
@@ -1149,7 +1166,8 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
const u16 mask = (1 << chip->info->num_ports) - 1;
struct dsa_switch *ds = chip->ds;
u16 output_ports = 0;
- int reg;
+ u16 reg;
+ int err;
int i;
/* allow CPU port or DSA link(s) to send frames to every port */
@@ -1170,14 +1188,14 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
/* prevent frames from going back out of the port they came in on */
output_ports &= ~BIT(port);
- reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
- if (reg < 0)
- return reg;
+ err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+ if (err)
+ return err;
reg &= ~mask;
reg |= output_ports & mask;
- return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg);
+ return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
}
static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
@@ -1218,23 +1236,22 @@ static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port,
u16 *new, u16 *old)
{
struct dsa_switch *ds = chip->ds;
- u16 pvid;
- int ret;
+ u16 pvid, reg;
+ int err;
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, &reg);
+ if (err)
+ return err;
- pvid = ret & PORT_DEFAULT_VLAN_MASK;
+ pvid = reg & PORT_DEFAULT_VLAN_MASK;
if (new) {
- ret &= ~PORT_DEFAULT_VLAN_MASK;
- ret |= *new & PORT_DEFAULT_VLAN_MASK;
+ reg &= ~PORT_DEFAULT_VLAN_MASK;
+ reg |= *new & PORT_DEFAULT_VLAN_MASK;
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_DEFAULT_VLAN, ret);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg);
+ if (err)
+ return err;
netdev_dbg(ds->ports[port].netdev,
"DefaultVID %d (was %d)\n", *new, pvid);
@@ -1613,7 +1630,8 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port,
struct dsa_switch *ds = chip->ds;
u16 upper_mask;
u16 fid;
- int ret;
+ u16 reg;
+ int err;
if (mv88e6xxx_num_databases(chip) == 4096)
upper_mask = 0xff;
@@ -1623,37 +1641,35 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port,
return -EOPNOTSUPP;
/* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+ if (err)
+ return err;
- fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
+ fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
if (new) {
- ret &= ~PORT_BASE_VLAN_FID_3_0_MASK;
- ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
+ reg &= ~PORT_BASE_VLAN_FID_3_0_MASK;
+ reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN,
- ret);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
+ if (err)
+ return err;
}
/* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, &reg);
+ if (err)
+ return err;
- fid |= (ret & upper_mask) << 4;
+ fid |= (reg & upper_mask) << 4;
if (new) {
- ret &= ~upper_mask;
- ret |= (*new >> 4) & upper_mask;
+ reg &= ~upper_mask;
+ reg |= (*new >> 4) & upper_mask;
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
- ret);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg);
+ if (err)
+ return err;
netdev_dbg(ds->ports[port].netdev,
"FID %d (was %d)\n", *new, fid);
@@ -1865,26 +1881,26 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
struct mv88e6xxx_chip *chip = ds->priv;
u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
PORT_CONTROL_2_8021Q_DISABLED;
- int ret;
+ u16 reg;
+ int err;
if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
return -EOPNOTSUPP;
mutex_lock(&chip->reg_lock);
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2);
- if (ret < 0)
+ err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, &reg);
+ if (err)
goto unlock;
- old = ret & PORT_CONTROL_2_8021Q_MASK;
+ old = reg & PORT_CONTROL_2_8021Q_MASK;
if (new != old) {
- ret &= ~PORT_CONTROL_2_8021Q_MASK;
- ret |= new & PORT_CONTROL_2_8021Q_MASK;
+ reg &= ~PORT_CONTROL_2_8021Q_MASK;
+ reg |= new & PORT_CONTROL_2_8021Q_MASK;
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2,
- ret);
- if (ret < 0)
+ err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
+ if (err)
goto unlock;
netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n",
@@ -1892,11 +1908,11 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
mv88e6xxx_port_8021q_mode_names[old]);
}
- ret = 0;
+ err = 0;
unlock:
mutex_unlock(&chip->reg_lock);
- return ret;
+ return err;
}
static int
@@ -2406,19 +2422,20 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
u16 is_reset = (ppu_active ? 0x8800 : 0xc800);
struct gpio_desc *gpiod = chip->reset;
unsigned long timeout;
- int ret;
+ int err, ret;
+ u16 reg;
int i;
/* Set all ports to the disabled state. */
for (i = 0; i < chip->info->num_ports; i++) {
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, &reg);
+ if (err)
+ return err;
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL,
- ret & 0xfffc);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, i, PORT_CONTROL,
+ reg & 0xfffc);
+ if (err)
+ return err;
}
/* Wait for transmit queues to drain. */
@@ -2437,11 +2454,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
* through global registers 0x18 and 0x19.
*/
if (ppu_active)
- ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000);
+ err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000);
else
- ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400);
- if (ret)
- return ret;
+ err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400);
+ if (err)
+ return err;
/* Wait up to one second for reset to complete. */
timeout = jiffies + 1 * HZ;
@@ -2455,11 +2472,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
usleep_range(1000, 2000);
}
if (time_after(jiffies, timeout))
- ret = -ETIMEDOUT;
+ err = -ETIMEDOUT;
else
- ret = 0;
+ err = 0;
- return ret;
+ return err;
}
static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip)
@@ -2480,21 +2497,10 @@ static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip)
return err;
}
-static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port,
- int reg, u16 *val)
-{
- int addr = chip->info->port_base_addr + port;
-
- if (port >= chip->info->num_ports)
- return -EINVAL;
-
- return mv88e6xxx_read(chip, addr, reg, val);
-}
-
static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
{
struct dsa_switch *ds = chip->ds;
- int ret;
+ int err;
u16 reg;
if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
@@ -2507,7 +2513,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
* and all DSA ports to their maximum bandwidth and
* full duplex.
*/
- reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
+ err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
reg &= ~PORT_PCS_CTRL_UNFORCED;
reg |= PORT_PCS_CTRL_FORCE_LINK |
@@ -2522,10 +2528,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
reg |= PORT_PCS_CTRL_UNFORCED;
}
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_PCS_CTRL, reg);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
+ if (err)
+ return err;
}
/* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
@@ -2576,26 +2581,25 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
PORT_CONTROL_FORWARD_UNKNOWN_MC;
}
if (reg) {
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_CONTROL, reg);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+ if (err)
+ return err;
}
/* If this port is connected to a SerDes, make sure the SerDes is not
* powered down.
*/
if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) {
- ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
- if (ret < 0)
- return ret;
- ret &= PORT_STATUS_CMODE_MASK;
- if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
- (ret == PORT_STATUS_CMODE_1000BASE_X) ||
- (ret == PORT_STATUS_CMODE_SGMII)) {
- ret = mv88e6xxx_serdes_power_on(chip);
- if (ret < 0)
- return ret;
+ err = mv88e6xxx_port_read(chip, port, PORT_STATUS, &reg);
+ if (err)
+ return err;
+ reg &= PORT_STATUS_CMODE_MASK;
+ if ((reg == PORT_STATUS_CMODE_100BASE_X) ||
+ (reg == PORT_STATUS_CMODE_1000BASE_X) ||
+ (reg == PORT_STATUS_CMODE_SGMII)) {
+ err = mv88e6xxx_serdes_power_on(chip);
+ if (err < 0)
+ return err;
}
}
@@ -2629,10 +2633,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
reg |= PORT_CONTROL_2_8021Q_DISABLED;
if (reg) {
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_CONTROL_2, reg);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
+ if (err)
+ return err;
}
/* Port Association Vector: when learning source addresses
@@ -2645,16 +2648,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
if (dsa_is_cpu_port(ds, port))
reg = 0;
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR,
- reg);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg);
+ if (err)
+ return err;
/* Egress rate control 2: disable egress rate control. */
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2,
- 0x0000);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000);
+ if (err)
+ return err;
if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
@@ -2663,96 +2664,89 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
* be paused for by the remote end or the period of
* time that this port can pause the remote end.
*/
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_PAUSE_CTRL, 0x0000);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000);
+ if (err)
+ return err;
/* Port ATU control: disable limiting the number of
* address database entries that this port is allowed
* to use.
*/
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_ATU_CONTROL, 0x0000);
+ err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL,
+ 0x0000);
/* Priority Override: disable DA, SA and VTU priority
* override.
*/
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_PRI_OVERRIDE, 0x0000);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE,
+ 0x0000);
+ if (err)
+ return err;
/* Port Ethertype: use the Ethertype DSA Ethertype
* value.
*/
if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) {
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_ETH_TYPE, ETH_P_EDSA);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE,
+ ETH_P_EDSA);
+ if (err)
+ return err;
}
/* Tag Remap: use an identity 802.1p prio -> switch
* prio mapping.
*/
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_TAG_REGMAP_0123, 0x3210);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123,
+ 0x3210);
+ if (err)
+ return err;
/* Tag Remap 2: use an identity 802.1p prio -> switch
* prio mapping.
*/
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_TAG_REGMAP_4567, 0x7654);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567,
+ 0x7654);
+ if (err)
+ return err;
}
/* Rate Control: disable ingress rate limiting. */
if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
mv88e6xxx_6320_family(chip)) {
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_RATE_CONTROL, 0x0001);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL,
+ 0x0001);
+ if (err)
+ return err;
} else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) {
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
- PORT_RATE_CONTROL, 0x0000);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL,
+ 0x0000);
+ if (err)
+ return err;
}
/* Port Control 1: disable trunking, disable sending
* learning messages to this port.
*/
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
- 0x0000);
- if (ret)
- return ret;
+ err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000);
+ if (err)
+ return err;
/* Port based VLAN map: give each port the same default address
* database, and allow bidirectional communication between the
* CPU and DSA port(s), and the other ports.
*/
- ret = _mv88e6xxx_port_fid_set(chip, port, 0);
- if (ret)
- return ret;
+ err = _mv88e6xxx_port_fid_set(chip, port, 0);
+ if (err)
+ return err;
- ret = _mv88e6xxx_port_based_vlan_map(chip, port);
- if (ret)
- return ret;
+ err = _mv88e6xxx_port_based_vlan_map(chip, port);
+ if (err)
+ return err;
/* Default VLAN ID and priority: don't set a default VLAN
* ID, and set the default packet priority to zero.
*/
- ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN,
- 0x0000);
- if (ret)
- return ret;
-
- return 0;
+ return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000);
}
static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 52f3f5231f51..827988397fd8 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -37,7 +37,6 @@
#define ADDR_SERDES 0x0f
#define SERDES_PAGE_FIBER 0x01
-#define REG_PORT(p) (0x10 + (p))
#define PORT_STATUS 0x00
#define PORT_STATUS_PAUSE_EN BIT(15)
#define PORT_STATUS_MY_PAUSE BIT(14)
@@ -453,36 +452,36 @@ enum mv88e6xxx_cap {
};
/* Bitmask of capabilities */
-#define MV88E6XXX_FLAG_EDSA BIT(MV88E6XXX_CAP_EDSA)
-#define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE)
-
-#define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD)
-#define MV88E6XXX_FLAG_SMI_DATA BIT(MV88E6XXX_CAP_SMI_DATA)
-
-#define MV88E6XXX_FLAG_PHY_PAGE BIT(MV88E6XXX_CAP_PHY_PAGE)
-
-#define MV88E6XXX_FLAG_SERDES BIT(MV88E6XXX_CAP_SERDES)
-
-#define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X)
-#define MV88E6XXX_FLAG_G2_IRL_CMD BIT(MV88E6XXX_CAP_G2_IRL_CMD)
-#define MV88E6XXX_FLAG_G2_IRL_DATA BIT(MV88E6XXX_CAP_G2_IRL_DATA)
-#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT(MV88E6XXX_CAP_G2_PVT_ADDR)
-#define MV88E6XXX_FLAG_G2_PVT_DATA BIT(MV88E6XXX_CAP_G2_PVT_DATA)
-#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT(MV88E6XXX_CAP_G2_SWITCH_MAC)
-#define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT)
-#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD)
-#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA)
-#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD)
-#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA)
-
-#define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU)
-#define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE)
-#define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU)
-#define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP)
-#define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT)
-#define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU)
+#define MV88E6XXX_FLAG_EDSA BIT_ULL(MV88E6XXX_CAP_EDSA)
+#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE)
+
+#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD)
+#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA)
+
+#define MV88E6XXX_FLAG_PHY_PAGE BIT_ULL(MV88E6XXX_CAP_PHY_PAGE)
+
+#define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES)
+
+#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X)
+#define MV88E6XXX_FLAG_G2_IRL_CMD BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD)
+#define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA)
+#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR)
+#define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA)
+#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC)
+#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT)
+#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD)
+#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD)
+#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA)
+
+#define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU)
+#define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE)
+#define MV88E6XXX_FLAG_STU BIT_ULL(MV88E6XXX_CAP_STU)
+#define MV88E6XXX_FLAG_TEMP BIT_ULL(MV88E6XXX_CAP_TEMP)
+#define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT)
+#define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU)
/* EEPROM Programming via Global2 with 16-bit data */
#define MV88E6XXX_FLAGS_EEPROM16 \
@@ -615,7 +614,7 @@ struct mv88e6xxx_info {
unsigned int num_ports;
unsigned int port_base_addr;
unsigned int age_time_coeff;
- unsigned long flags;
+ unsigned long long flags;
};
struct mv88e6xxx_atu_entry {
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 4788a89520dd..b3df70d07ff6 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -256,7 +256,7 @@ static struct regmap_access_table qca8k_readable_table = {
.n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges),
};
-struct regmap_config qca8k_regmap_config = {
+static struct regmap_config qca8k_regmap_config = {
.reg_bits = 16,
.val_bits = 32,
.reg_stride = 4,
@@ -1032,19 +1032,7 @@ static struct mdio_driver qca8kmdio_driver = {
},
};
-static int __init
-qca8kmdio_driver_register(void)
-{
- return mdio_driver_register(&qca8kmdio_driver);
-}
-module_init(qca8kmdio_driver_register);
-
-static void __exit
-qca8kmdio_driver_unregister(void)
-{
- mdio_driver_unregister(&qca8kmdio_driver);
-}
-module_exit(qca8kmdio_driver_unregister);
+mdio_module_driver(qca8kmdio_driver);
MODULE_AUTHOR("Mathieu Olivari, John Crispin <john@phrozen.org>");
MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 246129650967..c6b71f656992 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
-cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o
+cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o
cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o
cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o
cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 1f9867db3b78..ea0d1f188802 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -851,6 +851,9 @@ struct adapter {
spinlock_t stats_lock;
spinlock_t win0_lock ____cacheline_aligned_in_smp;
+
+ /* TC u32 offload */
+ struct cxgb4_tc_u32_table *tc_u32;
};
/* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1025,6 +1028,32 @@ enum {
VLAN_REWRITE
};
+/* Host shadow copy of ingress filter entry. This is in host native format
+ * and doesn't match the ordering or bit order, etc. of the hardware of the
+ * firmware command. The use of bit-field structure elements is purely to
+ * remind ourselves of the field size limitations and save memory in the case
+ * where the filter table is large.
+ */
+struct filter_entry {
+ /* Administrative fields for filter. */
+ u32 valid:1; /* filter allocated and valid */
+ u32 locked:1; /* filter is administratively locked */
+
+ u32 pending:1; /* filter action is pending firmware reply */
+ u32 smtidx:8; /* Source MAC Table index for smac */
+ struct filter_ctx *ctx; /* Caller's completion hook */
+ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */
+ struct net_device *dev; /* Associated net device */
+ u32 tid; /* This will store the actual tid */
+
+ /* The filter itself. Most of this is a straight copy of information
+ * provided by the extended ioctl(). Some fields are translated to
+ * internal forms -- for instance the Ingress Queue ID passed in from
+ * the ioctl() is translated into the Absolute Ingress Queue ID.
+ */
+ struct ch_filter_specification fs;
+};
+
static inline int is_offload(const struct adapter *adap)
{
return adap->params.offload;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 52be9a4ef97a..20455d082cb8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2748,12 +2748,6 @@ static void add_debugfs_mem(struct adapter *adap, const char *name,
size_mb << 20);
}
-static int blocked_fl_open(struct inode *inode, struct file *file)
-{
- file->private_data = inode->i_private;
- return 0;
-}
-
static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
@@ -2797,7 +2791,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf,
static const struct file_operations blocked_fl_fops = {
.owner = THIS_MODULE,
- .open = blocked_fl_open,
+ .open = simple_open,
.read = blocked_fl_read,
.write = blocked_fl_write,
.llseek = generic_file_llseek,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
new file mode 100644
index 000000000000..2a616171cb68
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -0,0 +1,721 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "t4_regs.h"
+#include "l2t.h"
+#include "t4fw_api.h"
+#include "cxgb4_filter.h"
+
+static inline bool is_field_set(u32 val, u32 mask)
+{
+ return val || mask;
+}
+
+static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask)
+{
+ return !(conf & conf_mask) && is_field_set(val, mask);
+}
+
+/* Validate filter spec against configuration done on the card. */
+static int validate_filter(struct net_device *dev,
+ struct ch_filter_specification *fs)
+{
+ struct adapter *adapter = netdev2adap(dev);
+ u32 fconf, iconf;
+
+ /* Check for unconfigured fields being used. */
+ fconf = adapter->params.tp.vlan_pri_map;
+ iconf = adapter->params.tp.ingress_config;
+
+ if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) ||
+ unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) ||
+ unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) ||
+ unsupported(fconf, ETHERTYPE_F, fs->val.ethtype,
+ fs->mask.ethtype) ||
+ unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) ||
+ unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype,
+ fs->mask.matchtype) ||
+ unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) ||
+ unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) ||
+ unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld,
+ fs->mask.pfvf_vld) ||
+ unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld,
+ fs->mask.ovlan_vld) ||
+ unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld))
+ return -EOPNOTSUPP;
+
+ /* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer
+ * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set
+ * in TP_INGRESS_CONFIG. Hense the somewhat crazy checks
+ * below. Additionally, since the T4 firmware interface also
+ * carries that overlap, we need to translate any PF/VF
+ * specification into that internal format below.
+ */
+ if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) &&
+ is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld))
+ return -EOPNOTSUPP;
+ if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) ||
+ (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) &&
+ (iconf & VNIC_F)))
+ return -EOPNOTSUPP;
+ if (fs->val.pf > 0x7 || fs->val.vf > 0x7f)
+ return -ERANGE;
+ fs->mask.pf &= 0x7;
+ fs->mask.vf &= 0x7f;
+
+ /* If the user is requesting that the filter action loop
+ * matching packets back out one of our ports, make sure that
+ * the egress port is in range.
+ */
+ if (fs->action == FILTER_SWITCH &&
+ fs->eport >= adapter->params.nports)
+ return -ERANGE;
+
+ /* Don't allow various trivially obvious bogus out-of-range values... */
+ if (fs->val.iport >= adapter->params.nports)
+ return -ERANGE;
+
+ /* T4 doesn't support removing VLAN Tags for loop back filters. */
+ if (is_t4(adapter->params.chip) &&
+ fs->action == FILTER_SWITCH &&
+ (fs->newvlan == VLAN_REMOVE ||
+ fs->newvlan == VLAN_REWRITE))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static unsigned int get_filter_steerq(struct net_device *dev,
+ struct ch_filter_specification *fs)
+{
+ struct adapter *adapter = netdev2adap(dev);
+ unsigned int iq;
+
+ /* If the user has requested steering matching Ingress Packets
+ * to a specific Queue Set, we need to make sure it's in range
+ * for the port and map that into the Absolute Queue ID of the
+ * Queue Set's Response Queue.
+ */
+ if (!fs->dirsteer) {
+ if (fs->iq)
+ return -EINVAL;
+ iq = 0;
+ } else {
+ struct port_info *pi = netdev_priv(dev);
+
+ /* If the iq id is greater than the number of qsets,
+ * then assume it is an absolute qid.
+ */
+ if (fs->iq < pi->nqsets)
+ iq = adapter->sge.ethrxq[pi->first_qset +
+ fs->iq].rspq.abs_id;
+ else
+ iq = fs->iq;
+ }
+
+ return iq;
+}
+
+static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family)
+{
+ spin_lock_bh(&t->ftid_lock);
+
+ if (test_bit(fidx, t->ftid_bmap)) {
+ spin_unlock_bh(&t->ftid_lock);
+ return -EBUSY;
+ }
+
+ if (family == PF_INET)
+ __set_bit(fidx, t->ftid_bmap);
+ else
+ bitmap_allocate_region(t->ftid_bmap, fidx, 2);
+
+ spin_unlock_bh(&t->ftid_lock);
+ return 0;
+}
+
+static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family)
+{
+ spin_lock_bh(&t->ftid_lock);
+ if (family == PF_INET)
+ __clear_bit(fidx, t->ftid_bmap);
+ else
+ bitmap_release_region(t->ftid_bmap, fidx, 2);
+ spin_unlock_bh(&t->ftid_lock);
+}
+
+/* Delete the filter at a specified index. */
+static int del_filter_wr(struct adapter *adapter, int fidx)
+{
+ struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+ struct fw_filter_wr *fwr;
+ struct sk_buff *skb;
+ unsigned int len;
+
+ len = sizeof(*fwr);
+
+ skb = alloc_skb(len, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ fwr = (struct fw_filter_wr *)__skb_put(skb, len);
+ t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
+
+ /* Mark the filter as "pending" and ship off the Filter Work Request.
+ * When we get the Work Request Reply we'll clear the pending status.
+ */
+ f->pending = 1;
+ t4_mgmt_tx(adapter, skb);
+ return 0;
+}
+
+/* Send a Work Request to write the filter at a specified index. We construct
+ * a Firmware Filter Work Request to have the work done and put the indicated
+ * filter into "pending" mode which will prevent any further actions against
+ * it till we get a reply from the firmware on the completion status of the
+ * request.
+ */
+int set_filter_wr(struct adapter *adapter, int fidx)
+{
+ struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+ struct fw_filter_wr *fwr;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ /* If the new filter requires loopback Destination MAC and/or VLAN
+ * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
+ * the filter.
+ */
+ if (f->fs.newdmac || f->fs.newvlan) {
+ /* allocate L2T entry for new filter */
+ f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
+ f->fs.eport, f->fs.dmac);
+ if (!f->l2t) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ }
+
+ fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
+ memset(fwr, 0, sizeof(*fwr));
+
+ /* It would be nice to put most of the following in t4_hw.c but most
+ * of the work is translating the cxgbtool ch_filter_specification
+ * into the Work Request and the definition of that structure is
+ * currently in cxgbtool.h which isn't appropriate to pull into the
+ * common code. We may eventually try to come up with a more neutral
+ * filter specification structure but for now it's easiest to simply
+ * put this fairly direct code in line ...
+ */
+ fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
+ fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16));
+ fwr->tid_to_iq =
+ htonl(FW_FILTER_WR_TID_V(f->tid) |
+ FW_FILTER_WR_RQTYPE_V(f->fs.type) |
+ FW_FILTER_WR_NOREPLY_V(0) |
+ FW_FILTER_WR_IQ_V(f->fs.iq));
+ fwr->del_filter_to_l2tix =
+ htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
+ FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
+ FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
+ FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
+ FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
+ FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
+ FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
+ FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
+ FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
+ f->fs.newvlan == VLAN_REWRITE) |
+ FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
+ f->fs.newvlan == VLAN_REWRITE) |
+ FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
+ FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
+ FW_FILTER_WR_PRIO_V(f->fs.prio) |
+ FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
+ fwr->ethtype = htons(f->fs.val.ethtype);
+ fwr->ethtypem = htons(f->fs.mask.ethtype);
+ fwr->frag_to_ovlan_vldm =
+ (FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
+ FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
+ FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
+ FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
+ FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
+ FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
+ fwr->smac_sel = 0;
+ fwr->rx_chan_rx_rpl_iq =
+ htons(FW_FILTER_WR_RX_CHAN_V(0) |
+ FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
+ fwr->maci_to_matchtypem =
+ htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
+ FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
+ FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
+ FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
+ FW_FILTER_WR_PORT_V(f->fs.val.iport) |
+ FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
+ FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
+ FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
+ fwr->ptcl = f->fs.val.proto;
+ fwr->ptclm = f->fs.mask.proto;
+ fwr->ttyp = f->fs.val.tos;
+ fwr->ttypm = f->fs.mask.tos;
+ fwr->ivlan = htons(f->fs.val.ivlan);
+ fwr->ivlanm = htons(f->fs.mask.ivlan);
+ fwr->ovlan = htons(f->fs.val.ovlan);
+ fwr->ovlanm = htons(f->fs.mask.ovlan);
+ memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
+ memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
+ memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
+ memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
+ fwr->lp = htons(f->fs.val.lport);
+ fwr->lpm = htons(f->fs.mask.lport);
+ fwr->fp = htons(f->fs.val.fport);
+ fwr->fpm = htons(f->fs.mask.fport);
+ if (f->fs.newsmac)
+ memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
+
+ /* Mark the filter as "pending" and ship off the Filter Work Request.
+ * When we get the Work Request Reply we'll clear the pending status.
+ */
+ f->pending = 1;
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
+ t4_ofld_send(adapter, skb);
+ return 0;
+}
+
+/* Return an error number if the indicated filter isn't writable ... */
+int writable_filter(struct filter_entry *f)
+{
+ if (f->locked)
+ return -EPERM;
+ if (f->pending)
+ return -EBUSY;
+
+ return 0;
+}
+
+/* Delete the filter at the specified index (if valid). The checks for all
+ * the common problems with doing this like the filter being locked, currently
+ * pending in another operation, etc.
+ */
+int delete_filter(struct adapter *adapter, unsigned int fidx)
+{
+ struct filter_entry *f;
+ int ret;
+
+ if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
+ return -EINVAL;
+
+ f = &adapter->tids.ftid_tab[fidx];
+ ret = writable_filter(f);
+ if (ret)
+ return ret;
+ if (f->valid)
+ return del_filter_wr(adapter, fidx);
+
+ return 0;
+}
+
+/* Clear a filter and release any of its resources that we own. This also
+ * clears the filter's "pending" status.
+ */
+void clear_filter(struct adapter *adap, struct filter_entry *f)
+{
+ /* If the new or old filter have loopback rewriteing rules then we'll
+ * need to free any existing Layer Two Table (L2T) entries of the old
+ * filter rule. The firmware will handle freeing up any Source MAC
+ * Table (SMT) entries used for rewriting Source MAC Addresses in
+ * loopback rules.
+ */
+ if (f->l2t)
+ cxgb4_l2t_release(f->l2t);
+
+ /* The zeroing of the filter rule below clears the filter valid,
+ * pending, locked flags, l2t pointer, etc. so it's all we need for
+ * this operation.
+ */
+ memset(f, 0, sizeof(*f));
+}
+
+void clear_all_filters(struct adapter *adapter)
+{
+ unsigned int i;
+
+ if (adapter->tids.ftid_tab) {
+ struct filter_entry *f = &adapter->tids.ftid_tab[0];
+ unsigned int max_ftid = adapter->tids.nftids +
+ adapter->tids.nsftids;
+
+ for (i = 0; i < max_ftid; i++, f++)
+ if (f->valid || f->pending)
+ clear_filter(adapter, f);
+ }
+}
+
+/* Fill up default masks for set match fields. */
+static void fill_default_mask(struct ch_filter_specification *fs)
+{
+ unsigned int lip = 0, lip_mask = 0;
+ unsigned int fip = 0, fip_mask = 0;
+ unsigned int i;
+
+ if (fs->val.iport && !fs->mask.iport)
+ fs->mask.iport |= ~0;
+ if (fs->val.fcoe && !fs->mask.fcoe)
+ fs->mask.fcoe |= ~0;
+ if (fs->val.matchtype && !fs->mask.matchtype)
+ fs->mask.matchtype |= ~0;
+ if (fs->val.macidx && !fs->mask.macidx)
+ fs->mask.macidx |= ~0;
+ if (fs->val.ethtype && !fs->mask.ethtype)
+ fs->mask.ethtype |= ~0;
+ if (fs->val.ivlan && !fs->mask.ivlan)
+ fs->mask.ivlan |= ~0;
+ if (fs->val.ovlan && !fs->mask.ovlan)
+ fs->mask.ovlan |= ~0;
+ if (fs->val.frag && !fs->mask.frag)
+ fs->mask.frag |= ~0;
+ if (fs->val.tos && !fs->mask.tos)
+ fs->mask.tos |= ~0;
+ if (fs->val.proto && !fs->mask.proto)
+ fs->mask.proto |= ~0;
+
+ for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) {
+ lip |= fs->val.lip[i];
+ lip_mask |= fs->mask.lip[i];
+ fip |= fs->val.fip[i];
+ fip_mask |= fs->mask.fip[i];
+ }
+
+ if (lip && !lip_mask)
+ memset(fs->mask.lip, ~0, sizeof(fs->mask.lip));
+
+ if (fip && !fip_mask)
+ memset(fs->mask.fip, ~0, sizeof(fs->mask.lip));
+
+ if (fs->val.lport && !fs->mask.lport)
+ fs->mask.lport = ~0;
+ if (fs->val.fport && !fs->mask.fport)
+ fs->mask.fport = ~0;
+}
+
+/* Check a Chelsio Filter Request for validity, convert it into our internal
+ * format and send it to the hardware. Return 0 on success, an error number
+ * otherwise. We attach any provided filter operation context to the internal
+ * filter specification in order to facilitate signaling completion of the
+ * operation.
+ */
+int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+ struct ch_filter_specification *fs,
+ struct filter_ctx *ctx)
+{
+ struct adapter *adapter = netdev2adap(dev);
+ unsigned int max_fidx, fidx, iq;
+ struct filter_entry *f;
+ u32 iconf;
+ int ret;
+
+ max_fidx = adapter->tids.nftids;
+ if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
+ filter_id >= max_fidx)
+ return -E2BIG;
+
+ fill_default_mask(fs);
+
+ ret = validate_filter(dev, fs);
+ if (ret)
+ return ret;
+
+ iq = get_filter_steerq(dev, fs);
+ if (iq < 0)
+ return iq;
+
+ /* IPv6 filters occupy four slots and must be aligned on
+ * four-slot boundaries. IPv4 filters only occupy a single
+ * slot and have no alignment requirements but writing a new
+ * IPv4 filter into the middle of an existing IPv6 filter
+ * requires clearing the old IPv6 filter and hence we prevent
+ * insertion.
+ */
+ if (fs->type == 0) { /* IPv4 */
+ /* If our IPv4 filter isn't being written to a
+ * multiple of four filter index and there's an IPv6
+ * filter at the multiple of 4 base slot, then we
+ * prevent insertion.
+ */
+ fidx = filter_id & ~0x3;
+ if (fidx != filter_id &&
+ adapter->tids.ftid_tab[fidx].fs.type) {
+ f = &adapter->tids.ftid_tab[fidx];
+ if (f->valid) {
+ dev_err(adapter->pdev_dev,
+ "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n",
+ fidx, fidx + 3);
+ return -EINVAL;
+ }
+ }
+ } else { /* IPv6 */
+ /* Ensure that the IPv6 filter is aligned on a
+ * multiple of 4 boundary.
+ */
+ if (filter_id & 0x3) {
+ dev_err(adapter->pdev_dev,
+ "Invalid location. IPv6 must be aligned on a 4-slot boundary\n");
+ return -EINVAL;
+ }
+
+ /* Check all except the base overlapping IPv4 filter slots. */
+ for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) {
+ f = &adapter->tids.ftid_tab[fidx];
+ if (f->valid) {
+ dev_err(adapter->pdev_dev,
+ "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n",
+ fidx);
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* Check to make sure that provided filter index is not
+ * already in use by someone else
+ */
+ f = &adapter->tids.ftid_tab[filter_id];
+ if (f->valid)
+ return -EBUSY;
+
+ fidx = filter_id + adapter->tids.ftid_base;
+ ret = cxgb4_set_ftid(&adapter->tids, filter_id,
+ fs->type ? PF_INET6 : PF_INET);
+ if (ret)
+ return ret;
+
+ /* Check to make sure the filter requested is writable ... */
+ ret = writable_filter(f);
+ if (ret) {
+ /* Clear the bits we have set above */
+ cxgb4_clear_ftid(&adapter->tids, filter_id,
+ fs->type ? PF_INET6 : PF_INET);
+ return ret;
+ }
+
+ /* Clear out any old resources being used by the filter before
+ * we start constructing the new filter.
+ */
+ if (f->valid)
+ clear_filter(adapter, f);
+
+ /* Convert the filter specification into our internal format.
+ * We copy the PF/VF specification into the Outer VLAN field
+ * here so the rest of the code -- including the interface to
+ * the firmware -- doesn't have to constantly do these checks.
+ */
+ f->fs = *fs;
+ f->fs.iq = iq;
+ f->dev = dev;
+
+ iconf = adapter->params.tp.ingress_config;
+ if (iconf & VNIC_F) {
+ f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf;
+ f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf;
+ f->fs.val.ovlan_vld = fs->val.pfvf_vld;
+ f->fs.mask.ovlan_vld = fs->mask.pfvf_vld;
+ }
+
+ /* Attempt to set the filter. If we don't succeed, we clear
+ * it and return the failure.
+ */
+ f->ctx = ctx;
+ f->tid = fidx; /* Save the actual tid */
+ ret = set_filter_wr(adapter, filter_id);
+ if (ret) {
+ cxgb4_clear_ftid(&adapter->tids, filter_id,
+ fs->type ? PF_INET6 : PF_INET);
+ clear_filter(adapter, f);
+ }
+
+ return ret;
+}
+
+/* Check a delete filter request for validity and send it to the hardware.
+ * Return 0 on success, an error number otherwise. We attach any provided
+ * filter operation context to the internal filter specification in order to
+ * facilitate signaling completion of the operation.
+ */
+int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+ struct filter_ctx *ctx)
+{
+ struct adapter *adapter = netdev2adap(dev);
+ struct filter_entry *f;
+ unsigned int max_fidx;
+ int ret;
+
+ max_fidx = adapter->tids.nftids;
+ if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
+ filter_id >= max_fidx)
+ return -E2BIG;
+
+ f = &adapter->tids.ftid_tab[filter_id];
+ ret = writable_filter(f);
+ if (ret)
+ return ret;
+
+ if (f->valid) {
+ f->ctx = ctx;
+ cxgb4_clear_ftid(&adapter->tids, filter_id,
+ f->fs.type ? PF_INET6 : PF_INET);
+ return del_filter_wr(adapter, filter_id);
+ }
+
+ /* If the caller has passed in a Completion Context then we need to
+ * mark it as a successful completion so they don't stall waiting
+ * for it.
+ */
+ if (ctx) {
+ ctx->result = 0;
+ complete(&ctx->completion);
+ }
+ return ret;
+}
+
+int cxgb4_set_filter(struct net_device *dev, int filter_id,
+ struct ch_filter_specification *fs)
+{
+ struct filter_ctx ctx;
+ int ret;
+
+ init_completion(&ctx.completion);
+
+ ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx);
+ if (ret)
+ goto out;
+
+ /* Wait for reply */
+ ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+ if (!ret)
+ return -ETIMEDOUT;
+
+ ret = ctx.result;
+out:
+ return ret;
+}
+
+int cxgb4_del_filter(struct net_device *dev, int filter_id)
+{
+ struct filter_ctx ctx;
+ int ret;
+
+ init_completion(&ctx.completion);
+
+ ret = __cxgb4_del_filter(dev, filter_id, &ctx);
+ if (ret)
+ goto out;
+
+ /* Wait for reply */
+ ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+ if (!ret)
+ return -ETIMEDOUT;
+
+ ret = ctx.result;
+out:
+ return ret;
+}
+
+/* Handle a filter write/deletion reply. */
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
+{
+ unsigned int tid = GET_TID(rpl);
+ struct filter_entry *f = NULL;
+ unsigned int max_fidx;
+ int idx;
+
+ max_fidx = adap->tids.nftids + adap->tids.nsftids;
+ /* Get the corresponding filter entry for this tid */
+ if (adap->tids.ftid_tab) {
+ /* Check this in normal filter region */
+ idx = tid - adap->tids.ftid_base;
+ if (idx >= max_fidx)
+ return;
+ f = &adap->tids.ftid_tab[idx];
+ if (f->tid != tid)
+ return;
+ }
+
+ /* We found the filter entry for this tid */
+ if (f) {
+ unsigned int ret = TCB_COOKIE_G(rpl->cookie);
+ struct filter_ctx *ctx;
+
+ /* Pull off any filter operation context attached to the
+ * filter.
+ */
+ ctx = f->ctx;
+ f->ctx = NULL;
+
+ if (ret == FW_FILTER_WR_FLT_DELETED) {
+ /* Clear the filter when we get confirmation from the
+ * hardware that the filter has been deleted.
+ */
+ clear_filter(adap, f);
+ if (ctx)
+ ctx->result = 0;
+ } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
+ dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
+ idx);
+ clear_filter(adap, f);
+ if (ctx)
+ ctx->result = -ENOMEM;
+ } else if (ret == FW_FILTER_WR_FLT_ADDED) {
+ f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
+ f->pending = 0; /* asynchronous setup completed */
+ f->valid = 1;
+ if (ctx) {
+ ctx->result = 0;
+ ctx->tid = idx;
+ }
+ } else {
+ /* Something went wrong. Issue a warning about the
+ * problem and clear everything out.
+ */
+ dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
+ idx, ret);
+ clear_filter(adap, f);
+ if (ctx)
+ ctx->result = -EINVAL;
+ }
+ if (ctx)
+ complete(&ctx->completion);
+ }
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
new file mode 100644
index 000000000000..23742cb1c69f
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -0,0 +1,48 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_FILTER_H
+#define __CXGB4_FILTER_H
+
+#include "t4_msg.h"
+
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl);
+void clear_filter(struct adapter *adap, struct filter_entry *f);
+
+int set_filter_wr(struct adapter *adapter, int fidx);
+int delete_filter(struct adapter *adapter, unsigned int fidx);
+
+int writable_filter(struct filter_entry *f);
+void clear_all_filters(struct adapter *adapter);
+#endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index d1ebb84c073e..1be4d235a576 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -67,6 +67,7 @@
#include <linux/crash_dump.h>
#include "cxgb4.h"
+#include "cxgb4_filter.h"
#include "t4_regs.h"
#include "t4_values.h"
#include "t4_msg.h"
@@ -77,6 +78,7 @@
#include "clip_tbl.h"
#include "l2t.h"
#include "sched.h"
+#include "cxgb4_tc_u32.h"
char cxgb4_driver_name[] = KBUILD_MODNAME;
@@ -87,30 +89,6 @@ char cxgb4_driver_name[] = KBUILD_MODNAME;
const char cxgb4_driver_version[] = DRV_VERSION;
#define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
-/* Host shadow copy of ingress filter entry. This is in host native format
- * and doesn't match the ordering or bit order, etc. of the hardware of the
- * firmware command. The use of bit-field structure elements is purely to
- * remind ourselves of the field size limitations and save memory in the case
- * where the filter table is large.
- */
-struct filter_entry {
- /* Administrative fields for filter.
- */
- u32 valid:1; /* filter allocated and valid */
- u32 locked:1; /* filter is administratively locked */
-
- u32 pending:1; /* filter action is pending firmware reply */
- u32 smtidx:8; /* Source MAC Table index for smac */
- struct l2t_entry *l2t; /* Layer Two Table entry for dmac */
-
- /* The filter itself. Most of this is a straight copy of information
- * provided by the extended ioctl(). Some fields are translated to
- * internal forms -- for instance the Ingress Queue ID passed in from
- * the ioctl() is translated into the Absolute Ingress Queue ID.
- */
- struct ch_filter_specification fs;
-};
-
#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
@@ -527,66 +505,6 @@ static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd)
}
#endif /* CONFIG_CHELSIO_T4_DCB */
-/* Clear a filter and release any of its resources that we own. This also
- * clears the filter's "pending" status.
- */
-static void clear_filter(struct adapter *adap, struct filter_entry *f)
-{
- /* If the new or old filter have loopback rewriteing rules then we'll
- * need to free any existing Layer Two Table (L2T) entries of the old
- * filter rule. The firmware will handle freeing up any Source MAC
- * Table (SMT) entries used for rewriting Source MAC Addresses in
- * loopback rules.
- */
- if (f->l2t)
- cxgb4_l2t_release(f->l2t);
-
- /* The zeroing of the filter rule below clears the filter valid,
- * pending, locked flags, l2t pointer, etc. so it's all we need for
- * this operation.
- */
- memset(f, 0, sizeof(*f));
-}
-
-/* Handle a filter write/deletion reply.
- */
-static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
-{
- unsigned int idx = GET_TID(rpl);
- unsigned int nidx = idx - adap->tids.ftid_base;
- unsigned int ret;
- struct filter_entry *f;
-
- if (idx >= adap->tids.ftid_base && nidx <
- (adap->tids.nftids + adap->tids.nsftids)) {
- idx = nidx;
- ret = TCB_COOKIE_G(rpl->cookie);
- f = &adap->tids.ftid_tab[idx];
-
- if (ret == FW_FILTER_WR_FLT_DELETED) {
- /* Clear the filter when we get confirmation from the
- * hardware that the filter has been deleted.
- */
- clear_filter(adap, f);
- } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
- dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
- idx);
- clear_filter(adap, f);
- } else if (ret == FW_FILTER_WR_FLT_ADDED) {
- f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
- f->pending = 0; /* asynchronous setup completed */
- f->valid = 1;
- } else {
- /* Something went wrong. Issue a warning about the
- * problem and clear everything out.
- */
- dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
- idx, ret);
- clear_filter(adap, f);
- }
- }
-}
-
/* Response queue handler for the FW event queue.
*/
static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
@@ -1026,151 +944,6 @@ void t4_free_mem(void *addr)
kvfree(addr);
}
-/* Send a Work Request to write the filter at a specified index. We construct
- * a Firmware Filter Work Request to have the work done and put the indicated
- * filter into "pending" mode which will prevent any further actions against
- * it till we get a reply from the firmware on the completion status of the
- * request.
- */
-static int set_filter_wr(struct adapter *adapter, int fidx)
-{
- struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
- struct sk_buff *skb;
- struct fw_filter_wr *fwr;
- unsigned int ftid;
-
- skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
-
- /* If the new filter requires loopback Destination MAC and/or VLAN
- * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
- * the filter.
- */
- if (f->fs.newdmac || f->fs.newvlan) {
- /* allocate L2T entry for new filter */
- f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
- f->fs.eport, f->fs.dmac);
- if (f->l2t == NULL) {
- kfree_skb(skb);
- return -ENOMEM;
- }
- }
-
- ftid = adapter->tids.ftid_base + fidx;
-
- fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
- memset(fwr, 0, sizeof(*fwr));
-
- /* It would be nice to put most of the following in t4_hw.c but most
- * of the work is translating the cxgbtool ch_filter_specification
- * into the Work Request and the definition of that structure is
- * currently in cxgbtool.h which isn't appropriate to pull into the
- * common code. We may eventually try to come up with a more neutral
- * filter specification structure but for now it's easiest to simply
- * put this fairly direct code in line ...
- */
- fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
- fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
- fwr->tid_to_iq =
- htonl(FW_FILTER_WR_TID_V(ftid) |
- FW_FILTER_WR_RQTYPE_V(f->fs.type) |
- FW_FILTER_WR_NOREPLY_V(0) |
- FW_FILTER_WR_IQ_V(f->fs.iq));
- fwr->del_filter_to_l2tix =
- htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
- FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
- FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
- FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
- FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
- FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
- FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
- FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
- FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
- f->fs.newvlan == VLAN_REWRITE) |
- FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
- f->fs.newvlan == VLAN_REWRITE) |
- FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
- FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
- FW_FILTER_WR_PRIO_V(f->fs.prio) |
- FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
- fwr->ethtype = htons(f->fs.val.ethtype);
- fwr->ethtypem = htons(f->fs.mask.ethtype);
- fwr->frag_to_ovlan_vldm =
- (FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
- FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
- FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
- FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
- FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
- FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
- fwr->smac_sel = 0;
- fwr->rx_chan_rx_rpl_iq =
- htons(FW_FILTER_WR_RX_CHAN_V(0) |
- FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
- fwr->maci_to_matchtypem =
- htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
- FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
- FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
- FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
- FW_FILTER_WR_PORT_V(f->fs.val.iport) |
- FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
- FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
- FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
- fwr->ptcl = f->fs.val.proto;
- fwr->ptclm = f->fs.mask.proto;
- fwr->ttyp = f->fs.val.tos;
- fwr->ttypm = f->fs.mask.tos;
- fwr->ivlan = htons(f->fs.val.ivlan);
- fwr->ivlanm = htons(f->fs.mask.ivlan);
- fwr->ovlan = htons(f->fs.val.ovlan);
- fwr->ovlanm = htons(f->fs.mask.ovlan);
- memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
- memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
- memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
- memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
- fwr->lp = htons(f->fs.val.lport);
- fwr->lpm = htons(f->fs.mask.lport);
- fwr->fp = htons(f->fs.val.fport);
- fwr->fpm = htons(f->fs.mask.fport);
- if (f->fs.newsmac)
- memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
-
- /* Mark the filter as "pending" and ship off the Filter Work Request.
- * When we get the Work Request Reply we'll clear the pending status.
- */
- f->pending = 1;
- set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
- t4_ofld_send(adapter, skb);
- return 0;
-}
-
-/* Delete the filter at a specified index.
- */
-static int del_filter_wr(struct adapter *adapter, int fidx)
-{
- struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
- struct sk_buff *skb;
- struct fw_filter_wr *fwr;
- unsigned int len, ftid;
-
- len = sizeof(*fwr);
- ftid = adapter->tids.ftid_base + fidx;
-
- skb = alloc_skb(len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
-
- fwr = (struct fw_filter_wr *)__skb_put(skb, len);
- t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
-
- /* Mark the filter as "pending" and ship off the Filter Work Request.
- * When we get the Work Request Reply we'll clear the pending status.
- */
- f->pending = 1;
- t4_mgmt_tx(adapter, skb);
- return 0;
-}
-
static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
@@ -1552,19 +1325,22 @@ EXPORT_SYMBOL(cxgb4_remove_tid);
*/
static int tid_init(struct tid_info *t)
{
- size_t size;
- unsigned int stid_bmap_size;
- unsigned int natids = t->natids;
struct adapter *adap = container_of(t, struct adapter, tids);
+ unsigned int max_ftids = t->nftids + t->nsftids;
+ unsigned int natids = t->natids;
+ unsigned int stid_bmap_size;
+ unsigned int ftid_bmap_size;
+ size_t size;
stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
+ ftid_bmap_size = BITS_TO_LONGS(t->nftids);
size = t->ntids * sizeof(*t->tid_tab) +
natids * sizeof(*t->atid_tab) +
t->nstids * sizeof(*t->stid_tab) +
t->nsftids * sizeof(*t->stid_tab) +
stid_bmap_size * sizeof(long) +
- t->nftids * sizeof(*t->ftid_tab) +
- t->nsftids * sizeof(*t->ftid_tab);
+ max_ftids * sizeof(*t->ftid_tab) +
+ ftid_bmap_size * sizeof(long);
t->tid_tab = t4_alloc_mem(size);
if (!t->tid_tab)
@@ -1574,8 +1350,10 @@ static int tid_init(struct tid_info *t)
t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+ t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
spin_lock_init(&t->stid_lock);
spin_lock_init(&t->atid_lock);
+ spin_lock_init(&t->ftid_lock);
t->stids_in_use = 0;
t->sftids_in_use = 0;
@@ -1590,12 +1368,16 @@ static int tid_init(struct tid_info *t)
t->atid_tab[natids - 1].next = &t->atid_tab[natids];
t->afree = t->atid_tab;
}
- bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
- /* Reserve stid 0 for T4/T5 adapters */
- if (!t->stid_base &&
- (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5))
- __set_bit(0, t->stid_bmap);
+ if (is_offload(adap)) {
+ bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
+ /* Reserve stid 0 for T4/T5 adapters */
+ if (!t->stid_base &&
+ CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+ __set_bit(0, t->stid_bmap);
+ }
+
+ bitmap_zero(t->ftid_bmap, t->nftids);
return 0;
}
@@ -2514,40 +2296,6 @@ static int cxgb_close(struct net_device *dev)
return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
}
-/* Return an error number if the indicated filter isn't writable ...
- */
-static int writable_filter(struct filter_entry *f)
-{
- if (f->locked)
- return -EPERM;
- if (f->pending)
- return -EBUSY;
-
- return 0;
-}
-
-/* Delete the filter at the specified index (if valid). The checks for all
- * the common problems with doing this like the filter being locked, currently
- * pending in another operation, etc.
- */
-static int delete_filter(struct adapter *adapter, unsigned int fidx)
-{
- struct filter_entry *f;
- int ret;
-
- if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
- return -EINVAL;
-
- f = &adapter->tids.ftid_tab[fidx];
- ret = writable_filter(f);
- if (ret)
- return ret;
- if (f->valid)
- return del_filter_wr(adapter, fidx);
-
- return 0;
-}
-
int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
__be32 sip, __be16 sport, __be16 vlan,
unsigned int queue, unsigned char port, unsigned char mask)
@@ -2964,6 +2712,35 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
return err;
}
+int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ struct tc_to_netdev *tc)
+{
+ struct port_info *pi = netdev2pinfo(dev);
+ struct adapter *adap = netdev2adap(dev);
+
+ if (!(adap->flags & FULL_INIT_DONE)) {
+ dev_err(adap->pdev_dev,
+ "Failed to setup tc on port %d. Link Down?\n",
+ pi->port_id);
+ return -EINVAL;
+ }
+
+ if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
+ tc->type == TC_SETUP_CLSU32) {
+ switch (tc->cls_u32->command) {
+ case TC_CLSU32_NEW_KNODE:
+ case TC_CLSU32_REPLACE_KNODE:
+ return cxgb4_config_knode(dev, proto, tc->cls_u32);
+ case TC_CLSU32_DELETE_KNODE:
+ return cxgb4_delete_knode(dev, proto, tc->cls_u32);
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return -EOPNOTSUPP;
+}
+
static const struct net_device_ops cxgb4_netdev_ops = {
.ndo_open = cxgb_open,
.ndo_stop = cxgb_close,
@@ -2987,6 +2764,7 @@ static const struct net_device_ops cxgb4_netdev_ops = {
.ndo_busy_poll = cxgb_busy_poll,
#endif
.ndo_set_tx_maxrate = cxgb_set_tx_maxrate,
+ .ndo_setup_tc = cxgb_setup_tc,
};
#ifdef CONFIG_PCI_IOV
@@ -4659,6 +4437,7 @@ static void free_some_resources(struct adapter *adapter)
t4_free_mem(adapter->l2t);
t4_cleanup_sched(adapter);
t4_free_mem(adapter->tids.tid_tab);
+ cxgb4_cleanup_tc_u32(adapter);
kfree(adapter->sge.egr_map);
kfree(adapter->sge.ingr_map);
kfree(adapter->sge.starving_fl);
@@ -5003,7 +4782,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_RXCSUM | NETIF_F_RXHASH |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_TC;
if (highdma)
netdev->hw_features |= NETIF_F_HIGHDMA;
netdev->features |= netdev->hw_features;
@@ -5087,10 +4867,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
i);
}
- if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
+ if (tid_init(&adapter->tids) < 0) {
dev_warn(&pdev->dev, "could not allocate TID table, "
"continuing\n");
adapter->params.offload = 0;
+ } else {
+ adapter->tc_u32 = cxgb4_init_tc_u32(adapter,
+ CXGB4_MAX_LINK_HANDLE);
+ if (!adapter->tc_u32)
+ dev_warn(&pdev->dev,
+ "could not offload tc u32, continuing\n");
}
if (is_offload(adapter)) {
@@ -5274,13 +5060,7 @@ static void remove_one(struct pci_dev *pdev)
/* If we allocated filters, free up state associated with any
* valid filters ...
*/
- if (adapter->tids.ftid_tab) {
- struct filter_entry *f = &adapter->tids.ftid_tab[0];
- for (i = 0; i < (adapter->tids.nftids +
- adapter->tids.nsftids); i++, f++)
- if (f->valid)
- clear_filter(adapter, f);
- }
+ clear_all_filters(adapter);
if (adapter->flags & FULL_INIT_DONE)
cxgb_down(adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
new file mode 100644
index 000000000000..49d2debb334e
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -0,0 +1,483 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "cxgb4.h"
+#include "cxgb4_tc_u32_parse.h"
+#include "cxgb4_tc_u32.h"
+
+/* Fill ch_filter_specification with parsed match value/mask pair. */
+static int fill_match_fields(struct adapter *adap,
+ struct ch_filter_specification *fs,
+ struct tc_cls_u32_offload *cls,
+ const struct cxgb4_match_field *entry,
+ bool next_header)
+{
+ unsigned int i, j;
+ u32 val, mask;
+ int off, err;
+ bool found;
+
+ for (i = 0; i < cls->knode.sel->nkeys; i++) {
+ off = cls->knode.sel->keys[i].off;
+ val = cls->knode.sel->keys[i].val;
+ mask = cls->knode.sel->keys[i].mask;
+
+ if (next_header) {
+ /* For next headers, parse only keys with offmask */
+ if (!cls->knode.sel->keys[i].offmask)
+ continue;
+ } else {
+ /* For the remaining, parse only keys without offmask */
+ if (cls->knode.sel->keys[i].offmask)
+ continue;
+ }
+
+ found = false;
+
+ for (j = 0; entry[j].val; j++) {
+ if (off == entry[j].off) {
+ found = true;
+ err = entry[j].val(fs, val, mask);
+ if (err)
+ return err;
+ break;
+ }
+ }
+
+ if (!found)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Fill ch_filter_specification with parsed action. */
+static int fill_action_fields(struct adapter *adap,
+ struct ch_filter_specification *fs,
+ struct tc_cls_u32_offload *cls)
+{
+ unsigned int num_actions = 0;
+ const struct tc_action *a;
+ struct tcf_exts *exts;
+ LIST_HEAD(actions);
+
+ exts = cls->knode.exts;
+ if (tc_no_actions(exts))
+ return -EINVAL;
+
+ tcf_exts_to_list(exts, &actions);
+ list_for_each_entry(a, &actions, list) {
+ /* Don't allow more than one action per rule. */
+ if (num_actions)
+ return -EINVAL;
+
+ /* Drop in hardware. */
+ if (is_tcf_gact_shot(a)) {
+ fs->action = FILTER_DROP;
+ num_actions++;
+ continue;
+ }
+
+ /* Re-direct to specified port in hardware. */
+ if (is_tcf_mirred_redirect(a)) {
+ struct net_device *n_dev;
+ unsigned int i, index;
+ bool found = false;
+
+ index = tcf_mirred_ifindex(a);
+ for_each_port(adap, i) {
+ n_dev = adap->port[i];
+ if (index == n_dev->ifindex) {
+ fs->action = FILTER_SWITCH;
+ fs->eport = i;
+ found = true;
+ break;
+ }
+ }
+
+ /* Interface doesn't belong to any port of
+ * the underlying hardware.
+ */
+ if (!found)
+ return -EINVAL;
+
+ num_actions++;
+ continue;
+ }
+
+ /* Un-supported action. */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
+ struct tc_cls_u32_offload *cls)
+{
+ const struct cxgb4_match_field *start, *link_start = NULL;
+ struct adapter *adapter = netdev2adap(dev);
+ struct ch_filter_specification fs;
+ struct cxgb4_tc_u32_table *t;
+ struct cxgb4_link *link;
+ unsigned int filter_id;
+ u32 uhtid, link_uhtid;
+ bool is_ipv6 = false;
+ int ret;
+
+ if (!can_tc_u32_offload(dev))
+ return -EOPNOTSUPP;
+
+ if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6))
+ return -EOPNOTSUPP;
+
+ /* Fetch the location to insert the filter. */
+ filter_id = cls->knode.handle & 0xFFFFF;
+
+ if (filter_id > adapter->tids.nftids) {
+ dev_err(adapter->pdev_dev,
+ "Location %d out of range for insertion. Max: %d\n",
+ filter_id, adapter->tids.nftids);
+ return -ERANGE;
+ }
+
+ t = adapter->tc_u32;
+ uhtid = TC_U32_USERHTID(cls->knode.handle);
+ link_uhtid = TC_U32_USERHTID(cls->knode.link_handle);
+
+ /* Ensure that uhtid is either root u32 (i.e. 0x800)
+ * or a a valid linked bucket.
+ */
+ if (uhtid != 0x800 && uhtid >= t->size)
+ return -EINVAL;
+
+ /* Ensure link handle uhtid is sane, if specified. */
+ if (link_uhtid >= t->size)
+ return -EINVAL;
+
+ memset(&fs, 0, sizeof(fs));
+
+ if (protocol == htons(ETH_P_IPV6)) {
+ start = cxgb4_ipv6_fields;
+ is_ipv6 = true;
+ } else {
+ start = cxgb4_ipv4_fields;
+ is_ipv6 = false;
+ }
+
+ if (uhtid != 0x800) {
+ /* Link must exist from root node before insertion. */
+ if (!t->table[uhtid - 1].link_handle)
+ return -EINVAL;
+
+ /* Link must have a valid supported next header. */
+ link_start = t->table[uhtid - 1].match_field;
+ if (!link_start)
+ return -EINVAL;
+ }
+
+ /* Parse links and record them for subsequent jumps to valid
+ * next headers.
+ */
+ if (link_uhtid) {
+ const struct cxgb4_next_header *next;
+ bool found = false;
+ unsigned int i, j;
+ u32 val, mask;
+ int off;
+
+ if (t->table[link_uhtid - 1].link_handle) {
+ dev_err(adapter->pdev_dev,
+ "Link handle exists for: 0x%x\n",
+ link_uhtid);
+ return -EINVAL;
+ }
+
+ next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps;
+
+ /* Try to find matches that allow jumps to next header. */
+ for (i = 0; next[i].jump; i++) {
+ if (next[i].offoff != cls->knode.sel->offoff ||
+ next[i].shift != cls->knode.sel->offshift ||
+ next[i].mask != cls->knode.sel->offmask ||
+ next[i].offset != cls->knode.sel->off)
+ continue;
+
+ /* Found a possible candidate. Find a key that
+ * matches the corresponding offset, value, and
+ * mask to jump to next header.
+ */
+ for (j = 0; j < cls->knode.sel->nkeys; j++) {
+ off = cls->knode.sel->keys[j].off;
+ val = cls->knode.sel->keys[j].val;
+ mask = cls->knode.sel->keys[j].mask;
+
+ if (next[i].match_off == off &&
+ next[i].match_val == val &&
+ next[i].match_mask == mask) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ continue; /* Try next candidate. */
+
+ /* Candidate to jump to next header found.
+ * Translate all keys to internal specification
+ * and store them in jump table. This spec is copied
+ * later to set the actual filters.
+ */
+ ret = fill_match_fields(adapter, &fs, cls,
+ start, false);
+ if (ret)
+ goto out;
+
+ link = &t->table[link_uhtid - 1];
+ link->match_field = next[i].jump;
+ link->link_handle = cls->knode.handle;
+ memcpy(&link->fs, &fs, sizeof(fs));
+ break;
+ }
+
+ /* No candidate found to jump to next header. */
+ if (!found)
+ return -EINVAL;
+
+ return 0;
+ }
+
+ /* Fill ch_filter_specification match fields to be shipped to hardware.
+ * Copy the linked spec (if any) first. And then update the spec as
+ * needed.
+ */
+ if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) {
+ /* Copy linked ch_filter_specification */
+ memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs));
+ ret = fill_match_fields(adapter, &fs, cls,
+ link_start, true);
+ if (ret)
+ goto out;
+ }
+
+ ret = fill_match_fields(adapter, &fs, cls, start, false);
+ if (ret)
+ goto out;
+
+ /* Fill ch_filter_specification action fields to be shipped to
+ * hardware.
+ */
+ ret = fill_action_fields(adapter, &fs, cls);
+ if (ret)
+ goto out;
+
+ /* The filter spec has been completely built from the info
+ * provided from u32. We now set some default fields in the
+ * spec for sanity.
+ */
+
+ /* Match only packets coming from the ingress port where this
+ * filter will be created.
+ */
+ fs.val.iport = netdev2pinfo(dev)->port_id;
+ fs.mask.iport = ~0;
+
+ /* Enable filter hit counts. */
+ fs.hitcnts = 1;
+
+ /* Set type of filter - IPv6 or IPv4 */
+ fs.type = is_ipv6 ? 1 : 0;
+
+ /* Set the filter */
+ ret = cxgb4_set_filter(dev, filter_id, &fs);
+ if (ret)
+ goto out;
+
+ /* If this is a linked bucket, then set the corresponding
+ * entry in the bitmap to mark it as belonging to this linked
+ * bucket.
+ */
+ if (uhtid != 0x800 && t->table[uhtid - 1].link_handle)
+ set_bit(filter_id, t->table[uhtid - 1].tid_map);
+
+out:
+ return ret;
+}
+
+int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
+ struct tc_cls_u32_offload *cls)
+{
+ struct adapter *adapter = netdev2adap(dev);
+ unsigned int filter_id, max_tids, i, j;
+ struct cxgb4_link *link = NULL;
+ struct cxgb4_tc_u32_table *t;
+ u32 handle, uhtid;
+ int ret;
+
+ if (!can_tc_u32_offload(dev))
+ return -EOPNOTSUPP;
+
+ /* Fetch the location to delete the filter. */
+ filter_id = cls->knode.handle & 0xFFFFF;
+
+ if (filter_id > adapter->tids.nftids) {
+ dev_err(adapter->pdev_dev,
+ "Location %d out of range for deletion. Max: %d\n",
+ filter_id, adapter->tids.nftids);
+ return -ERANGE;
+ }
+
+ t = adapter->tc_u32;
+ handle = cls->knode.handle;
+ uhtid = TC_U32_USERHTID(cls->knode.handle);
+
+ /* Ensure that uhtid is either root u32 (i.e. 0x800)
+ * or a a valid linked bucket.
+ */
+ if (uhtid != 0x800 && uhtid >= t->size)
+ return -EINVAL;
+
+ /* Delete the specified filter */
+ if (uhtid != 0x800) {
+ link = &t->table[uhtid - 1];
+ if (!link->link_handle)
+ return -EINVAL;
+
+ if (!test_bit(filter_id, link->tid_map))
+ return -EINVAL;
+ }
+
+ ret = cxgb4_del_filter(dev, filter_id);
+ if (ret)
+ goto out;
+
+ if (link)
+ clear_bit(filter_id, link->tid_map);
+
+ /* If a link is being deleted, then delete all filters
+ * associated with the link.
+ */
+ max_tids = adapter->tids.nftids;
+ for (i = 0; i < t->size; i++) {
+ link = &t->table[i];
+
+ if (link->link_handle == handle) {
+ for (j = 0; j < max_tids; j++) {
+ if (!test_bit(j, link->tid_map))
+ continue;
+
+ ret = __cxgb4_del_filter(dev, j, NULL);
+ if (ret)
+ goto out;
+
+ clear_bit(j, link->tid_map);
+ }
+
+ /* Clear the link state */
+ link->match_field = NULL;
+ link->link_handle = 0;
+ memset(&link->fs, 0, sizeof(link->fs));
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+void cxgb4_cleanup_tc_u32(struct adapter *adap)
+{
+ struct cxgb4_tc_u32_table *t;
+ unsigned int i;
+
+ if (!adap->tc_u32)
+ return;
+
+ /* Free up all allocated memory. */
+ t = adap->tc_u32;
+ for (i = 0; i < t->size; i++) {
+ struct cxgb4_link *link = &t->table[i];
+
+ t4_free_mem(link->tid_map);
+ }
+ t4_free_mem(adap->tc_u32);
+}
+
+struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap,
+ unsigned int size)
+{
+ struct cxgb4_tc_u32_table *t;
+ unsigned int i;
+
+ if (!size)
+ return NULL;
+
+ t = t4_alloc_mem(sizeof(*t) +
+ (size * sizeof(struct cxgb4_link)));
+ if (!t)
+ return NULL;
+
+ t->size = size;
+
+ for (i = 0; i < t->size; i++) {
+ struct cxgb4_link *link = &t->table[i];
+ unsigned int bmap_size;
+ unsigned int max_tids;
+
+ max_tids = adap->tids.nftids;
+ bmap_size = BITS_TO_LONGS(max_tids);
+ link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size);
+ if (!link->tid_map)
+ goto out_no_mem;
+ bitmap_zero(link->tid_map, max_tids);
+ }
+
+ return t;
+
+out_no_mem:
+ for (i = 0; i < t->size; i++) {
+ struct cxgb4_link *link = &t->table[i];
+
+ if (link->tid_map)
+ t4_free_mem(link->tid_map);
+ }
+
+ if (t)
+ t4_free_mem(t);
+
+ return NULL;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
new file mode 100644
index 000000000000..6bdc885eff22
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_U32_H
+#define __CXGB4_TC_U32_H
+
+#include <net/pkt_cls.h>
+
+#define CXGB4_MAX_LINK_HANDLE 32
+
+static inline bool can_tc_u32_offload(struct net_device *dev)
+{
+ struct adapter *adap = netdev2adap(dev);
+
+ return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false;
+}
+
+int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
+ struct tc_cls_u32_offload *cls);
+int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
+ struct tc_cls_u32_offload *cls);
+
+void cxgb4_cleanup_tc_u32(struct adapter *adapter);
+struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap,
+ unsigned int size);
+#endif /* __CXGB4_TC_U32_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
new file mode 100644
index 000000000000..a4b99edcc339
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
@@ -0,0 +1,294 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_U32_PARSE_H
+#define __CXGB4_TC_U32_PARSE_H
+
+struct cxgb4_match_field {
+ int off; /* Offset from the beginning of the header to match */
+ /* Fill the value/mask pair in the spec if matched */
+ int (*val)(struct ch_filter_specification *f, u32 val, u32 mask);
+};
+
+/* IPv4 match fields */
+static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ f->val.tos = (ntohl(val) >> 16) & 0x000000FF;
+ f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF;
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ u32 mask_val;
+ u8 frag_val;
+
+ frag_val = (ntohl(val) >> 13) & 0x00000007;
+ mask_val = ntohl(mask) & 0x0000FFFF;
+
+ if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */
+ f->val.frag = 1;
+ f->mask.frag = 1;
+ } else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */
+ f->val.frag = 0;
+ f->mask.frag = 1;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ f->val.proto = (ntohl(val) >> 16) & 0x000000FF;
+ f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF;
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.fip[0], &val, sizeof(u32));
+ memcpy(&f->mask.fip[0], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.lip[0], &val, sizeof(u32));
+ memcpy(&f->mask.lip[0], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static const struct cxgb4_match_field cxgb4_ipv4_fields[] = {
+ { .off = 0, .val = cxgb4_fill_ipv4_tos },
+ { .off = 4, .val = cxgb4_fill_ipv4_frag },
+ { .off = 8, .val = cxgb4_fill_ipv4_proto },
+ { .off = 12, .val = cxgb4_fill_ipv4_src_ip },
+ { .off = 16, .val = cxgb4_fill_ipv4_dst_ip },
+ { .val = NULL }
+};
+
+/* IPv6 match fields */
+static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ f->val.tos = (ntohl(val) >> 20) & 0x000000FF;
+ f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF;
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ f->val.proto = (ntohl(val) >> 8) & 0x000000FF;
+ f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF;
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.fip[0], &val, sizeof(u32));
+ memcpy(&f->mask.fip[0], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.fip[4], &val, sizeof(u32));
+ memcpy(&f->mask.fip[4], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.fip[8], &val, sizeof(u32));
+ memcpy(&f->mask.fip[8], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.fip[12], &val, sizeof(u32));
+ memcpy(&f->mask.fip[12], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.lip[0], &val, sizeof(u32));
+ memcpy(&f->mask.lip[0], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.lip[4], &val, sizeof(u32));
+ memcpy(&f->mask.lip[4], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.lip[8], &val, sizeof(u32));
+ memcpy(&f->mask.lip[8], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ memcpy(&f->val.lip[12], &val, sizeof(u32));
+ memcpy(&f->mask.lip[12], &mask, sizeof(u32));
+
+ return 0;
+}
+
+static const struct cxgb4_match_field cxgb4_ipv6_fields[] = {
+ { .off = 0, .val = cxgb4_fill_ipv6_tos },
+ { .off = 4, .val = cxgb4_fill_ipv6_proto },
+ { .off = 8, .val = cxgb4_fill_ipv6_src_ip0 },
+ { .off = 12, .val = cxgb4_fill_ipv6_src_ip1 },
+ { .off = 16, .val = cxgb4_fill_ipv6_src_ip2 },
+ { .off = 20, .val = cxgb4_fill_ipv6_src_ip3 },
+ { .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 },
+ { .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 },
+ { .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 },
+ { .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 },
+ { .val = NULL }
+};
+
+/* TCP/UDP match */
+static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f,
+ u32 val, u32 mask)
+{
+ f->val.fport = ntohl(val) >> 16;
+ f->mask.fport = ntohl(mask) >> 16;
+ f->val.lport = ntohl(val) & 0x0000FFFF;
+ f->mask.lport = ntohl(mask) & 0x0000FFFF;
+
+ return 0;
+};
+
+static const struct cxgb4_match_field cxgb4_tcp_fields[] = {
+ { .off = 0, .val = cxgb4_fill_l4_ports },
+ { .val = NULL }
+};
+
+static const struct cxgb4_match_field cxgb4_udp_fields[] = {
+ { .off = 0, .val = cxgb4_fill_l4_ports },
+ { .val = NULL }
+};
+
+struct cxgb4_next_header {
+ unsigned int offset; /* Offset to next header */
+ /* offset, shift, and mask added to offset above
+ * to get to next header. Useful when using a header
+ * field's value to jump to next header such as IHL field
+ * in IPv4 header.
+ */
+ unsigned int offoff;
+ u32 shift;
+ u32 mask;
+ /* match criteria to make this jump */
+ unsigned int match_off;
+ u32 match_val;
+ u32 match_mask;
+ /* location of jump to make */
+ const struct cxgb4_match_field *jump;
+};
+
+/* Accept a rule with a jump to transport layer header based on IHL field in
+ * IPv4 header.
+ */
+static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = {
+ { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
+ .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00,
+ .jump = cxgb4_tcp_fields },
+ { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
+ .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00,
+ .jump = cxgb4_udp_fields },
+ { .jump = NULL }
+};
+
+/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header
+ * to get to transport layer header.
+ */
+static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = {
+ { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
+ .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000,
+ .jump = cxgb4_tcp_fields },
+ { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
+ .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000,
+ .jump = cxgb4_udp_fields },
+ { .jump = NULL }
+};
+
+struct cxgb4_link {
+ const struct cxgb4_match_field *match_field; /* Next header */
+ struct ch_filter_specification fs; /* Match spec associated with link */
+ u32 link_handle; /* Knode handle associated with the link */
+ unsigned long *tid_map; /* Bitmap for filter tids */
+};
+
+struct cxgb4_tc_u32_table {
+ unsigned int size; /* number of entries in table */
+ struct cxgb4_link table[0]; /* Jump table */
+};
+#endif /* __CXGB4_TC_U32_PARSE_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index b3544f6b88ca..47bd14f602db 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -1,7 +1,7 @@
/*
* This file is part of the Chelsio T4 Ethernet driver for Linux.
*
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -106,6 +106,7 @@ struct tid_info {
unsigned int atid_base;
struct filter_entry *ftid_tab;
+ unsigned long *ftid_bmap;
unsigned int nftids;
unsigned int ftid_base;
unsigned int aftid_base;
@@ -126,6 +127,8 @@ struct tid_info {
atomic_t tids_in_use;
/* TIDs in the HASH */
atomic_t hash_tids_in_use;
+ /* lock for setting/clearing filter bitmap */
+ spinlock_t ftid_lock;
};
static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
@@ -185,6 +188,27 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
unsigned int queue, bool ipv6);
+/* Filter operation context to allow callers of cxgb4_set_filter() and
+ * cxgb4_del_filter() to wait for an asynchronous completion.
+ */
+struct filter_ctx {
+ struct completion completion; /* completion rendezvous */
+ void *closure; /* caller's opaque information */
+ int result; /* result of operation */
+ u32 tid; /* to store tid */
+};
+
+struct ch_filter_specification;
+
+int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+ struct ch_filter_specification *fs,
+ struct filter_ctx *ctx);
+int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+ struct filter_ctx *ctx);
+int cxgb4_set_filter(struct net_device *dev, int filter_id,
+ struct ch_filter_specification *fs);
+int cxgb4_del_filter(struct net_device *dev, int filter_id);
+
static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
{
skb_set_queue_mapping(skb, (queue << 1) | prio);
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 36361f8bf894..90f9c5481290 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -60,6 +60,8 @@ struct ftgmac100 {
struct ftgmac100_descs *descs;
dma_addr_t descs_dma_addr;
+ struct page *rx_pages[RX_QUEUE_ENTRIES];
+
unsigned int rx_pointer;
unsigned int tx_clean_pointer;
unsigned int tx_pointer;
@@ -77,6 +79,9 @@ struct ftgmac100 {
int int_mask_all;
bool use_ncsi;
bool enabled;
+
+ u32 rxdes0_edorr_mask;
+ u32 txdes0_edotr_mask;
};
static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv,
@@ -257,10 +262,11 @@ static bool ftgmac100_rxdes_packet_ready(struct ftgmac100_rxdes *rxdes)
return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY);
}
-static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes)
+static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv,
+ struct ftgmac100_rxdes *rxdes)
{
/* clear status bits */
- rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR);
+ rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask);
}
static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes)
@@ -298,9 +304,10 @@ static bool ftgmac100_rxdes_multicast(struct ftgmac100_rxdes *rxdes)
return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST);
}
-static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes)
+static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv,
+ struct ftgmac100_rxdes *rxdes)
{
- rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR);
+ rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask);
}
static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes,
@@ -341,18 +348,27 @@ static bool ftgmac100_rxdes_ipcs_err(struct ftgmac100_rxdes *rxdes)
return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR);
}
+static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv,
+ struct ftgmac100_rxdes *rxdes)
+{
+ return &priv->rx_pages[rxdes - priv->descs->rxdes];
+}
+
/*
* rxdes2 is not used by hardware. We use it to keep track of page.
* Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu().
*/
-static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page)
+static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv,
+ struct ftgmac100_rxdes *rxdes,
+ struct page *page)
{
- rxdes->rxdes2 = (unsigned int)page;
+ *ftgmac100_rxdes_page_slot(priv, rxdes) = page;
}
-static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes)
+static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv,
+ struct ftgmac100_rxdes *rxdes)
{
- return (struct page *)rxdes->rxdes2;
+ return *ftgmac100_rxdes_page_slot(priv, rxdes);
}
/******************************************************************************
@@ -382,7 +398,7 @@ ftgmac100_rx_locate_first_segment(struct ftgmac100 *priv)
if (ftgmac100_rxdes_first_segment(rxdes))
return rxdes;
- ftgmac100_rxdes_set_dma_own(rxdes);
+ ftgmac100_rxdes_set_dma_own(priv, rxdes);
ftgmac100_rx_pointer_advance(priv);
rxdes = ftgmac100_current_rxdes(priv);
}
@@ -453,7 +469,7 @@ static void ftgmac100_rx_drop_packet(struct ftgmac100 *priv)
if (ftgmac100_rxdes_last_segment(rxdes))
done = true;
- ftgmac100_rxdes_set_dma_own(rxdes);
+ ftgmac100_rxdes_set_dma_own(priv, rxdes);
ftgmac100_rx_pointer_advance(priv);
rxdes = ftgmac100_current_rxdes(priv);
} while (!done && ftgmac100_rxdes_packet_ready(rxdes));
@@ -501,7 +517,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
do {
dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes);
- struct page *page = ftgmac100_rxdes_get_page(rxdes);
+ struct page *page = ftgmac100_rxdes_get_page(priv, rxdes);
unsigned int size;
dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE);
@@ -545,10 +561,11 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
/******************************************************************************
* internal functions (transmit descriptor)
*****************************************************************************/
-static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes)
+static void ftgmac100_txdes_reset(const struct ftgmac100 *priv,
+ struct ftgmac100_txdes *txdes)
{
/* clear all except end of ring bit */
- txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR);
+ txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask);
txdes->txdes1 = 0;
txdes->txdes2 = 0;
txdes->txdes3 = 0;
@@ -569,9 +586,10 @@ static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes)
txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN);
}
-static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes)
+static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv,
+ struct ftgmac100_txdes *txdes)
{
- txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR);
+ txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask);
}
static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes)
@@ -690,7 +708,7 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
dev_kfree_skb(skb);
- ftgmac100_txdes_reset(txdes);
+ ftgmac100_txdes_reset(priv, txdes);
ftgmac100_tx_clean_pointer_advance(priv);
@@ -779,9 +797,9 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv,
return -ENOMEM;
}
- ftgmac100_rxdes_set_page(rxdes, page);
+ ftgmac100_rxdes_set_page(priv, rxdes, page);
ftgmac100_rxdes_set_dma_addr(rxdes, map);
- ftgmac100_rxdes_set_dma_own(rxdes);
+ ftgmac100_rxdes_set_dma_own(priv, rxdes);
return 0;
}
@@ -791,7 +809,7 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv)
for (i = 0; i < RX_QUEUE_ENTRIES; i++) {
struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i];
- struct page *page = ftgmac100_rxdes_get_page(rxdes);
+ struct page *page = ftgmac100_rxdes_get_page(priv, rxdes);
dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes);
if (!page)
@@ -828,7 +846,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv)
return -ENOMEM;
/* initialize RX ring */
- ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]);
+ ftgmac100_rxdes_set_end_of_ring(priv,
+ &priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]);
for (i = 0; i < RX_QUEUE_ENTRIES; i++) {
struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i];
@@ -838,7 +857,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv)
}
/* initialize TX ring */
- ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]);
+ ftgmac100_txdes_set_end_of_ring(priv,
+ &priv->descs->txdes[TX_QUEUE_ENTRIES - 1]);
return 0;
err:
@@ -1055,14 +1075,12 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
}
if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF |
- FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR |
- FTGMAC100_INT_PHYSTS_CHG)) {
+ FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) {
if (net_ratelimit())
- netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status,
+ netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status,
status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "",
status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "",
- status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "",
- status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : "");
+ status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "");
if (status & FTGMAC100_INT_NO_RXBUF) {
/* RX buffer unavailable */
@@ -1092,6 +1110,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
static int ftgmac100_open(struct net_device *netdev)
{
struct ftgmac100 *priv = netdev_priv(netdev);
+ unsigned int status;
int err;
err = ftgmac100_alloc_buffers(priv);
@@ -1117,6 +1136,11 @@ static int ftgmac100_open(struct net_device *netdev)
ftgmac100_init_hw(priv);
ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10);
+
+ /* Clear stale interrupts */
+ status = ioread32(priv->base + FTGMAC100_OFFSET_ISR);
+ iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR);
+
if (netdev->phydev)
phy_start(netdev->phydev);
else if (priv->use_ncsi)
@@ -1226,12 +1250,21 @@ static int ftgmac100_setup_mdio(struct net_device *netdev)
struct ftgmac100 *priv = netdev_priv(netdev);
struct platform_device *pdev = to_platform_device(priv->dev);
int i, err = 0;
+ u32 reg;
/* initialize mdio bus */
priv->mii_bus = mdiobus_alloc();
if (!priv->mii_bus)
return -EIO;
+ if (of_machine_is_compatible("aspeed,ast2400") ||
+ of_machine_is_compatible("aspeed,ast2500")) {
+ /* This driver supports the old MDIO interface */
+ reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR);
+ reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE;
+ iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR);
+ };
+
priv->mii_bus->name = "ftgmac100_mdio";
snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d",
pdev->name, pdev->id);
@@ -1355,9 +1388,18 @@ static int ftgmac100_probe(struct platform_device *pdev)
FTGMAC100_INT_XPKT_ETH |
FTGMAC100_INT_XPKT_LOST |
FTGMAC100_INT_AHB_ERR |
- FTGMAC100_INT_PHYSTS_CHG |
FTGMAC100_INT_RPKT_BUF |
FTGMAC100_INT_NO_RXBUF);
+
+ if (of_machine_is_compatible("aspeed,ast2400") ||
+ of_machine_is_compatible("aspeed,ast2500")) {
+ priv->rxdes0_edorr_mask = BIT(30);
+ priv->txdes0_edotr_mask = BIT(30);
+ } else {
+ priv->rxdes0_edorr_mask = BIT(15);
+ priv->txdes0_edotr_mask = BIT(15);
+ }
+
if (pdev->dev.of_node &&
of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) {
if (!IS_ENABLED(CONFIG_NET_NCSI)) {
@@ -1367,7 +1409,6 @@ static int ftgmac100_probe(struct platform_device *pdev)
dev_info(&pdev->dev, "Using NCSI interface\n");
priv->use_ncsi = true;
- priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG;
priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler);
if (!priv->ndev)
goto err_ncsi_dev;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
index 13408d448b05..a7ce0ac8858a 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.h
+++ b/drivers/net/ethernet/faraday/ftgmac100.h
@@ -134,6 +134,11 @@
#define FTGMAC100_DMAFIFOS_TXDMA_REQ (1 << 31)
/*
+ * Feature Register
+ */
+#define FTGMAC100_REVR_NEW_MDIO_INTERFACE BIT(31)
+
+/*
* Receive buffer size register
*/
#define FTGMAC100_RBSR_SIZE(x) ((x) & 0x3fff)
@@ -152,6 +157,7 @@
#define FTGMAC100_MACCR_FULLDUP (1 << 8)
#define FTGMAC100_MACCR_GIGA_MODE (1 << 9)
#define FTGMAC100_MACCR_CRC_APD (1 << 10)
+#define FTGMAC100_MACCR_PHY_LINK_LEVEL (1 << 11)
#define FTGMAC100_MACCR_RX_RUNT (1 << 12)
#define FTGMAC100_MACCR_JUMBO_LF (1 << 13)
#define FTGMAC100_MACCR_RX_ALL (1 << 14)
@@ -189,7 +195,6 @@ struct ftgmac100_txdes {
} __attribute__ ((aligned(16)));
#define FTGMAC100_TXDES0_TXBUF_SIZE(x) ((x) & 0x3fff)
-#define FTGMAC100_TXDES0_EDOTR (1 << 15)
#define FTGMAC100_TXDES0_CRC_ERR (1 << 19)
#define FTGMAC100_TXDES0_LTS (1 << 28)
#define FTGMAC100_TXDES0_FTS (1 << 29)
@@ -215,7 +220,6 @@ struct ftgmac100_rxdes {
} __attribute__ ((aligned(16)));
#define FTGMAC100_RXDES0_VDBC 0x3fff
-#define FTGMAC100_RXDES0_EDORR (1 << 15)
#define FTGMAC100_RXDES0_MULTICAST (1 << 16)
#define FTGMAC100_RXDES0_BROADCAST (1 << 17)
#define FTGMAC100_RXDES0_RX_ERR (1 << 18)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index d7e1f8c7ae92..059aaeda46b1 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -994,10 +994,10 @@ static void hns_nic_adjust_link(struct net_device *ndev)
struct hnae_handle *h = priv->ae_handle;
int state = 1;
- if (priv->phy) {
+ if (ndev->phydev) {
h->dev->ops->adjust_link(h, ndev->phydev->speed,
ndev->phydev->duplex);
- state = priv->phy->link;
+ state = ndev->phydev->link;
}
state = state && h->dev->ops->get_status(h);
@@ -1022,7 +1022,6 @@ static void hns_nic_adjust_link(struct net_device *ndev)
*/
int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h)
{
- struct hns_nic_priv *priv = netdev_priv(ndev);
struct phy_device *phy_dev = h->phy_dev;
int ret;
@@ -1046,8 +1045,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h)
if (h->phy_if == PHY_INTERFACE_MODE_XGMII)
phy_dev->autoneg = false;
- priv->phy = phy_dev;
-
return 0;
}
@@ -1224,8 +1221,8 @@ static int hns_nic_net_up(struct net_device *ndev)
if (ret)
goto out_start_err;
- if (priv->phy)
- phy_start(priv->phy);
+ if (ndev->phydev)
+ phy_start(ndev->phydev);
clear_bit(NIC_STATE_DOWN, &priv->state);
(void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ);
@@ -1259,8 +1256,8 @@ static void hns_nic_net_down(struct net_device *ndev)
netif_tx_disable(ndev);
priv->link = 0;
- if (priv->phy)
- phy_stop(priv->phy);
+ if (ndev->phydev)
+ phy_stop(ndev->phydev);
ops = priv->ae_handle->dev->ops;
@@ -1359,8 +1356,7 @@ static void hns_nic_net_timeout(struct net_device *ndev)
static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
int cmd)
{
- struct hns_nic_priv *priv = netdev_priv(netdev);
- struct phy_device *phy_dev = priv->phy;
+ struct phy_device *phy_dev = netdev->phydev;
if (!netif_running(netdev))
return -EINVAL;
@@ -2017,9 +2013,8 @@ static int hns_nic_dev_remove(struct platform_device *pdev)
hns_nic_uninit_ring_data(priv);
priv->ring_data = NULL;
- if (priv->phy)
- phy_disconnect(priv->phy);
- priv->phy = NULL;
+ if (ndev->phydev)
+ phy_disconnect(ndev->phydev);
if (!IS_ERR_OR_NULL(priv->ae_handle))
hnae_put_handle(priv->ae_handle);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 44bb3015eed3..5b412de350aa 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -59,7 +59,6 @@ struct hns_nic_priv {
u32 port_id;
int phy_mode;
int phy_led_val;
- struct phy_device *phy;
struct net_device *netdev;
struct device *dev;
struct hnae_handle *ae_handle;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 5eb3245bdf86..47e59bbfd061 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -48,9 +48,9 @@ static u32 hns_nic_get_link(struct net_device *net_dev)
h = priv->ae_handle;
- if (priv->phy) {
- if (!genphy_read_status(priv->phy))
- link_stat = priv->phy->link;
+ if (net_dev->phydev) {
+ if (!genphy_read_status(net_dev->phydev))
+ link_stat = net_dev->phydev->link;
else
link_stat = 0;
}
@@ -64,15 +64,14 @@ static u32 hns_nic_get_link(struct net_device *net_dev)
}
static void hns_get_mdix_mode(struct net_device *net_dev,
- struct ethtool_cmd *cmd)
+ struct ethtool_link_ksettings *cmd)
{
int mdix_ctrl, mdix, retval, is_resolved;
- struct hns_nic_priv *priv = netdev_priv(net_dev);
- struct phy_device *phy_dev = priv->phy;
+ struct phy_device *phy_dev = net_dev->phydev;
if (!phy_dev || !phy_dev->mdio.bus) {
- cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
- cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+ cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+ cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
return;
}
@@ -89,35 +88,35 @@ static void hns_get_mdix_mode(struct net_device *net_dev,
switch (mdix_ctrl) {
case 0x0:
- cmd->eth_tp_mdix_ctrl = ETH_TP_MDI;
+ cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI;
break;
case 0x1:
- cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X;
+ cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X;
break;
case 0x3:
- cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+ cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
break;
default:
- cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+ cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
break;
}
if (!is_resolved)
- cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+ cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
else if (mdix)
- cmd->eth_tp_mdix = ETH_TP_MDI_X;
+ cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
else
- cmd->eth_tp_mdix = ETH_TP_MDI;
+ cmd->base.eth_tp_mdix = ETH_TP_MDI;
}
/**
- *hns_nic_get_settings - implement ethtool get settings
+ *hns_nic_get_link_ksettings - implement ethtool get link ksettings
*@net_dev: net_device
- *@cmd: ethtool_cmd
+ *@cmd: ethtool_link_ksettings
*retuen 0 - success , negative --fail
*/
-static int hns_nic_get_settings(struct net_device *net_dev,
- struct ethtool_cmd *cmd)
+static int hns_nic_get_link_ksettings(struct net_device *net_dev,
+ struct ethtool_link_ksettings *cmd)
{
struct hns_nic_priv *priv = netdev_priv(net_dev);
struct hnae_handle *h;
@@ -125,6 +124,7 @@ static int hns_nic_get_settings(struct net_device *net_dev,
int ret;
u8 duplex;
u16 speed;
+ u32 supported, advertising;
if (!priv || !priv->ae_handle)
return -ESRCH;
@@ -139,38 +139,43 @@ static int hns_nic_get_settings(struct net_device *net_dev,
return -EINVAL;
}
+ ethtool_convert_link_mode_to_legacy_u32(&supported,
+ cmd->link_modes.supported);
+ ethtool_convert_link_mode_to_legacy_u32(&advertising,
+ cmd->link_modes.advertising);
+
/* When there is no phy, autoneg is off. */
- cmd->autoneg = false;
- ethtool_cmd_speed_set(cmd, speed);
- cmd->duplex = duplex;
+ cmd->base.autoneg = false;
+ cmd->base.cmd = speed;
+ cmd->base.duplex = duplex;
- if (priv->phy)
- (void)phy_ethtool_gset(priv->phy, cmd);
+ if (net_dev->phydev)
+ (void)phy_ethtool_ksettings_get(net_dev->phydev, cmd);
link_stat = hns_nic_get_link(net_dev);
if (!link_stat) {
- ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN);
- cmd->duplex = DUPLEX_UNKNOWN;
+ cmd->base.speed = (u32)SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
}
- if (cmd->autoneg)
- cmd->advertising |= ADVERTISED_Autoneg;
+ if (cmd->base.autoneg)
+ advertising |= ADVERTISED_Autoneg;
- cmd->supported |= h->if_support;
+ supported |= h->if_support;
if (h->phy_if == PHY_INTERFACE_MODE_SGMII) {
- cmd->supported |= SUPPORTED_TP;
- cmd->advertising |= ADVERTISED_1000baseT_Full;
+ supported |= SUPPORTED_TP;
+ advertising |= ADVERTISED_1000baseT_Full;
} else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) {
- cmd->supported |= SUPPORTED_FIBRE;
- cmd->advertising |= ADVERTISED_10000baseKR_Full;
+ supported |= SUPPORTED_FIBRE;
+ advertising |= ADVERTISED_10000baseKR_Full;
}
switch (h->media_type) {
case HNAE_MEDIA_TYPE_FIBER:
- cmd->port = PORT_FIBRE;
+ cmd->base.port = PORT_FIBRE;
break;
case HNAE_MEDIA_TYPE_COPPER:
- cmd->port = PORT_TP;
+ cmd->base.port = PORT_TP;
break;
case HNAE_MEDIA_TYPE_UNKNOWN:
default:
@@ -178,23 +183,27 @@ static int hns_nic_get_settings(struct net_device *net_dev,
}
if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG))
- cmd->supported |= SUPPORTED_Pause;
+ supported |= SUPPORTED_Pause;
+
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ advertising);
- cmd->transceiver = XCVR_EXTERNAL;
- cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22);
+ cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22;
hns_get_mdix_mode(net_dev, cmd);
return 0;
}
/**
- *hns_nic_set_settings - implement ethtool set settings
+ *hns_nic_set_link_settings - implement ethtool set link ksettings
*@net_dev: net_device
- *@cmd: ethtool_cmd
+ *@cmd: ethtool_link_ksettings
*retuen 0 - success , negative --fail
*/
-static int hns_nic_set_settings(struct net_device *net_dev,
- struct ethtool_cmd *cmd)
+static int hns_nic_set_link_ksettings(struct net_device *net_dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct hns_nic_priv *priv = netdev_priv(net_dev);
struct hnae_handle *h;
@@ -208,24 +217,25 @@ static int hns_nic_set_settings(struct net_device *net_dev,
return -ENODEV;
h = priv->ae_handle;
- speed = ethtool_cmd_speed(cmd);
+ speed = cmd->base.speed;
if (h->phy_if == PHY_INTERFACE_MODE_XGMII) {
- if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 ||
- cmd->duplex != DUPLEX_FULL)
+ if (cmd->base.autoneg == AUTONEG_ENABLE ||
+ speed != SPEED_10000 ||
+ cmd->base.duplex != DUPLEX_FULL)
return -EINVAL;
} else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) {
- if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE)
+ if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE)
return -EINVAL;
- if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF)
+ if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF)
return -EINVAL;
- if (priv->phy)
- return phy_ethtool_sset(priv->phy, cmd);
+ if (net_dev->phydev)
+ return phy_ethtool_ksettings_set(net_dev->phydev, cmd);
if ((speed != SPEED_10 && speed != SPEED_100 &&
- speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF &&
- cmd->duplex != DUPLEX_FULL))
+ speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF &&
+ cmd->base.duplex != DUPLEX_FULL))
return -EINVAL;
} else {
netdev_err(net_dev, "Not supported!");
@@ -233,7 +243,7 @@ static int hns_nic_set_settings(struct net_device *net_dev,
}
if (h->dev->ops->adjust_link) {
- h->dev->ops->adjust_link(h, (int)speed, cmd->duplex);
+ h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex);
return 0;
}
@@ -305,7 +315,7 @@ static int __lb_setup(struct net_device *ndev,
{
int ret = 0;
struct hns_nic_priv *priv = netdev_priv(ndev);
- struct phy_device *phy_dev = priv->phy;
+ struct phy_device *phy_dev = ndev->phydev;
struct hnae_handle *h = priv->ae_handle;
switch (loop) {
@@ -910,7 +920,7 @@ void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES],
ETH_GSTRING_LEN);
buff += ETH_GSTRING_LEN;
- if ((priv->phy) && (!priv->phy->is_c45))
+ if ((netdev->phydev) && (!netdev->phydev->is_c45))
memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY],
ETH_GSTRING_LEN);
@@ -996,7 +1006,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset)
if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII)
cnt--;
- if ((!priv->phy) || (priv->phy->is_c45))
+ if ((!netdev->phydev) || (netdev->phydev->is_c45))
cnt--;
return cnt;
@@ -1015,8 +1025,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset)
int hns_phy_led_set(struct net_device *netdev, int value)
{
int retval;
- struct hns_nic_priv *priv = netdev_priv(netdev);
- struct phy_device *phy_dev = priv->phy;
+ struct phy_device *phy_dev = netdev->phydev;
retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED);
retval |= phy_write(phy_dev, HNS_LED_FC_REG, value);
@@ -1039,7 +1048,7 @@ int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
{
struct hns_nic_priv *priv = netdev_priv(netdev);
struct hnae_handle *h = priv->ae_handle;
- struct phy_device *phy_dev = priv->phy;
+ struct phy_device *phy_dev = netdev->phydev;
int ret;
if (phy_dev)
@@ -1159,8 +1168,7 @@ static int hns_get_regs_len(struct net_device *net_dev)
static int hns_nic_nway_reset(struct net_device *netdev)
{
int ret = 0;
- struct hns_nic_priv *priv = netdev_priv(netdev);
- struct phy_device *phy = priv->phy;
+ struct phy_device *phy = netdev->phydev;
if (netif_running(netdev)) {
if (phy)
@@ -1267,8 +1275,6 @@ static int hns_get_rxnfc(struct net_device *netdev,
static const struct ethtool_ops hns_ethtool_ops = {
.get_drvinfo = hns_nic_get_drvinfo,
.get_link = hns_nic_get_link,
- .get_settings = hns_nic_get_settings,
- .set_settings = hns_nic_set_settings,
.get_ringparam = hns_get_ringparam,
.get_pauseparam = hns_get_pauseparam,
.set_pauseparam = hns_set_pauseparam,
@@ -1288,6 +1294,8 @@ static const struct ethtool_ops hns_ethtool_ops = {
.get_rxfh = hns_get_rss,
.set_rxfh = hns_set_rss,
.get_rxnfc = hns_get_rxnfc,
+ .get_link_ksettings = hns_nic_get_link_ksettings,
+ .set_link_ksettings = hns_nic_set_link_ksettings,
};
void hns_ethtool_set_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 2e1b17ad52a3..ad03763e009a 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -334,7 +334,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
if (IS_ERR(adapter->ptp_clock)) {
adapter->ptp_clock = NULL;
e_err("ptp_clock_register failed\n");
- } else {
+ } else if (adapter->ptp_clock) {
e_info("registered PHC clock\n");
}
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index ed39cbad24bd..f1feceab758a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -669,7 +669,7 @@ void i40e_ptp_init(struct i40e_pf *pf)
pf->ptp_clock = NULL;
dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n",
__func__);
- } else {
+ } else if (pf->ptp_clock) {
struct timespec64 ts;
u32 regval;
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 66dfa2085cc7..1dd14e166dc8 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1159,7 +1159,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
if (IS_ERR(adapter->ptp_clock)) {
adapter->ptp_clock = NULL;
dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
- } else {
+ } else if (adapter->ptp_clock) {
dev_info(&adapter->pdev->dev, "added PHC on %s\n",
adapter->netdev->name);
adapter->ptp_flags |= IGB_PTP_ENABLED;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index e5431bfe3339..a92277683a64 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1254,7 +1254,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
adapter->ptp_clock = NULL;
e_dev_err("ptp_clock_register failed\n");
return err;
- } else
+ } else if (adapter->ptp_clock)
e_dev_info("registered PHC device on %s\n", netdev->name);
/* set default timestamp mode to disabled here. We do this in
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index ca6b5013e275..2909372c4da0 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1894,11 +1894,8 @@ static void mtk_uninit(struct net_device *dev)
struct mtk_eth *eth = mac->hw;
phy_disconnect(mac->phy_dev);
- mtk_mdio_cleanup(eth);
mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
- free_irq(eth->irq[1], dev);
- free_irq(eth->irq[2], dev);
}
static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -2454,6 +2451,7 @@ static int mtk_remove(struct platform_device *pdev)
netif_napi_del(&eth->tx_napi);
netif_napi_del(&eth->rx_napi);
mtk_cleanup(eth);
+ mtk_mdio_cleanup(eth);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index f04a423ff79d..a58d96cf1ed1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -785,17 +785,23 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO);
if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+ int ret;
+
if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
return mlx4_internal_err_ret_value(dev, op,
op_modifier);
+ down_read(&mlx4_priv(dev)->cmd.switch_sem);
if (mlx4_priv(dev)->cmd.use_events)
- return mlx4_cmd_wait(dev, in_param, out_param,
- out_is_imm, in_modifier,
- op_modifier, op, timeout);
+ ret = mlx4_cmd_wait(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
else
- return mlx4_cmd_poll(dev, in_param, out_param,
- out_is_imm, in_modifier,
- op_modifier, op, timeout);
+ ret = mlx4_cmd_poll(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
+
+ up_read(&mlx4_priv(dev)->cmd.switch_sem);
+ return ret;
}
return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm,
in_modifier, op_modifier, op, timeout);
@@ -2454,6 +2460,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
int flags = 0;
if (!priv->cmd.initialized) {
+ init_rwsem(&priv->cmd.switch_sem);
mutex_init(&priv->cmd.slave_cmd_mutex);
sema_init(&priv->cmd.poll_sem, 1);
priv->cmd.use_events = 0;
@@ -2583,6 +2590,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
if (!priv->cmd.context)
return -ENOMEM;
+ down_write(&priv->cmd.switch_sem);
for (i = 0; i < priv->cmd.max_cmds; ++i) {
priv->cmd.context[i].token = i;
priv->cmd.context[i].next = i + 1;
@@ -2606,6 +2614,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
down(&priv->cmd.poll_sem);
priv->cmd.use_events = 1;
+ up_write(&priv->cmd.switch_sem);
return err;
}
@@ -2618,6 +2627,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
+ down_write(&priv->cmd.switch_sem);
priv->cmd.use_events = 0;
for (i = 0; i < priv->cmd.max_cmds; ++i)
@@ -2626,6 +2636,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev)
kfree(priv->cmd.context);
up(&priv->cmd.poll_sem);
+ up_write(&priv->cmd.switch_sem);
}
struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 1494997c4f7e..08fc5fc56d43 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -298,7 +298,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
if (IS_ERR(mdev->ptp_clock)) {
mdev->ptp_clock = NULL;
mlx4_err(mdev, "ptp_clock_register failed\n");
- } else {
+ } else if (mdev->ptp_clock) {
mlx4_info(mdev, "registered PHC clock\n");
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c46355bce613..f2e8beddcf44 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -72,7 +72,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
}
dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
frag_info->dma_dir);
- if (dma_mapping_error(priv->ddev, dma)) {
+ if (unlikely(dma_mapping_error(priv->ddev, dma))) {
put_page(page);
return -ENOMEM;
}
@@ -108,7 +108,8 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
ring_alloc[i].page_size)
continue;
- if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
+ if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
+ frag_info, gfp)))
goto out;
}
@@ -585,7 +586,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
frag_info = &priv->frag_info[nr];
if (length <= frag_info->frag_prefix_size)
break;
- if (!frags[nr].page)
+ if (unlikely(!frags[nr].page))
goto fail;
dma = be64_to_cpu(rx_desc->data[nr].addr);
@@ -625,7 +626,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
dma_addr_t dma;
skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
- if (!skb) {
+ if (unlikely(!skb)) {
en_dbg(RX_ERR, priv, "Failed allocating skb\n");
return NULL;
}
@@ -736,7 +737,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
{
__wsum csum_pseudo_hdr = 0;
- if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
+ if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT ||
+ ipv6h->nexthdr == IPPROTO_HOPOPTS))
return -1;
hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
@@ -769,7 +771,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
get_fixed_ipv4_csum(hw_checksum, skb, hdr);
#if IS_ENABLED(CONFIG_IPV6)
else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
- if (get_fixed_ipv6_csum(hw_checksum, skb, hdr))
+ if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr)))
return -1;
#endif
return 0;
@@ -796,10 +798,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
u64 timestamp;
bool l2_tunnel;
- if (!priv->port_up)
+ if (unlikely(!priv->port_up))
return 0;
- if (budget <= 0)
+ if (unlikely(budget <= 0))
return polled;
/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -902,9 +904,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
case XDP_PASS:
break;
case XDP_TX:
- if (!mlx4_en_xmit_frame(frags, dev,
+ if (likely(!mlx4_en_xmit_frame(frags, dev,
length, tx_index,
- &doorbell_pending))
+ &doorbell_pending)))
goto consumed;
goto xdp_drop; /* Drop on xmit failure */
default:
@@ -912,7 +914,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
case XDP_ABORTED:
case XDP_DROP:
xdp_drop:
- if (mlx4_en_rx_recycle(ring, frags))
+ if (likely(mlx4_en_rx_recycle(ring, frags)))
goto consumed;
goto next;
}
@@ -1016,12 +1018,12 @@ xdp_drop:
/* GRO not possible, complete processing here */
skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
- if (!skb) {
+ if (unlikely(!skb)) {
ring->dropped++;
goto next;
}
- if (unlikely(priv->validate_loopback)) {
+ if (unlikely(priv->validate_loopback)) {
validate_loopback(priv, skb);
goto next;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index c9d7fc5159f2..c128ba3ef014 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -46,6 +46,7 @@
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <net/devlink.h>
+#include <linux/rwsem.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/driver.h>
@@ -627,6 +628,7 @@ struct mlx4_cmd {
struct mutex slave_cmd_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
+ struct rw_semaphore switch_sem;
int max_cmds;
spinlock_t context_lock;
int free_head;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 67146624eb58..f44d089e2ca6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -45,15 +45,12 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
struct mlx4_srq *srq;
- spin_lock(&srq_table->lock);
-
+ rcu_read_lock();
srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
+ rcu_read_unlock();
if (srq)
atomic_inc(&srq->refcount);
-
- spin_unlock(&srq_table->lock);
-
- if (!srq) {
+ else {
mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
return;
}
@@ -301,12 +298,11 @@ struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
struct mlx4_srq *srq;
- unsigned long flags;
- spin_lock_irqsave(&srq_table->lock, flags);
+ rcu_read_lock();
srq = radix_tree_lookup(&srq_table->tree,
srqn & (dev->caps.num_srqs - 1));
- spin_unlock_irqrestore(&srq_table->lock, flags);
+ rcu_read_unlock();
return srq;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 7dd4763e726e..346015407b70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -65,6 +65,8 @@
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6
+#define MLX5_RX_HEADROOM NET_SKB_PAD
+
#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */
#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */
#define MLX5_MPWRQ_LOG_WQE_SZ 18
@@ -99,6 +101,18 @@
#define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */
#define MLX5E_SQ_BF_BUDGET 16
+#define MLX5E_ICOSQ_MAX_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+#define MLX5E_XDP_IHS_DS_COUNT \
+ DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT \
+ (MLX5E_XDP_IHS_DS_COUNT + \
+ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_WQEBBS \
+ DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS)
+
#define MLX5E_NUM_MAIN_GROUPS 9
static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
@@ -302,10 +316,20 @@ struct mlx5e_page_cache {
struct mlx5e_rq {
/* data path */
struct mlx5_wq_ll wq;
- u32 wqe_sz;
- struct sk_buff **skb;
- struct mlx5e_mpw_info *wqe_info;
- void *mtt_no_align;
+
+ union {
+ struct mlx5e_dma_info *dma_info;
+ struct {
+ struct mlx5e_mpw_info *info;
+ void *mtt_no_align;
+ u32 mtt_offset;
+ } mpwqe;
+ };
+ struct {
+ u8 page_order;
+ u32 wqe_sz; /* wqe data buffer size */
+ u8 map_dir; /* dma map direction */
+ } buff;
__be32 mkey_be;
struct device *pdev;
@@ -321,9 +345,9 @@ struct mlx5e_rq {
unsigned long state;
int ix;
- u32 mpwqe_mtt_offset;
struct mlx5e_rx_am am; /* Adaptive Moderation */
+ struct bpf_prog *xdp_prog;
/* control */
struct mlx5_wq_ctrl wq_ctrl;
@@ -370,11 +394,17 @@ enum {
MLX5E_SQ_STATE_BF_ENABLE,
};
-struct mlx5e_ico_wqe_info {
+struct mlx5e_sq_wqe_info {
u8 opcode;
u8 num_wqebbs;
};
+enum mlx5e_sq_type {
+ MLX5E_SQ_TXQ,
+ MLX5E_SQ_ICO,
+ MLX5E_SQ_XDP
+};
+
struct mlx5e_sq {
/* data path */
@@ -392,10 +422,20 @@ struct mlx5e_sq {
struct mlx5e_cq cq;
- /* pointers to per packet info: write@xmit, read@completion */
- struct sk_buff **skb;
- struct mlx5e_sq_dma *dma_fifo;
- struct mlx5e_tx_wqe_info *wqe_info;
+ /* pointers to per tx element info: write@xmit, read@completion */
+ union {
+ struct {
+ struct sk_buff **skb;
+ struct mlx5e_sq_dma *dma_fifo;
+ struct mlx5e_tx_wqe_info *wqe_info;
+ } txq;
+ struct mlx5e_sq_wqe_info *ico_wqe;
+ struct {
+ struct mlx5e_sq_wqe_info *wqe_info;
+ struct mlx5e_dma_info *di;
+ bool doorbell;
+ } xdp;
+ } db;
/* read only */
struct mlx5_wq_cyc wq;
@@ -417,8 +457,8 @@ struct mlx5e_sq {
struct mlx5_uar uar;
struct mlx5e_channel *channel;
int tc;
- struct mlx5e_ico_wqe_info *ico_wqe_info;
u32 rate_limit;
+ u8 type;
} ____cacheline_aligned_in_smp;
static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
@@ -434,8 +474,10 @@ enum channel_flags {
struct mlx5e_channel {
/* data path */
struct mlx5e_rq rq;
+ struct mlx5e_sq xdp_sq;
struct mlx5e_sq sq[MLX5E_MAX_NUM_TC];
struct mlx5e_sq icosq; /* internal control operations */
+ bool xdp;
struct napi_struct napi;
struct device *pdev;
struct net_device *netdev;
@@ -617,6 +659,7 @@ struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_sq **txq_to_sq_map;
int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+ struct bpf_prog *xdp_prog;
/* priv data path fields - end */
unsigned long state;
@@ -663,7 +706,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_sq_descs(struct mlx5e_sq *sq);
void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
bool recycle);
@@ -764,7 +807,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
{
- return rq->mpwqe_mtt_offset +
+ return rq->mpwqe.mtt_offset +
wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 847a8f3ac2b2..13dc388667b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -273,7 +273,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
&priv->mdev->pdev->dev);
- if (IS_ERR_OR_NULL(tstamp->ptp)) {
+ if (IS_ERR(tstamp->ptp)) {
mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n",
PTR_ERR(tstamp->ptp));
tstamp->ptp = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8595b507e200..a9fc9d4c6af4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -34,6 +34,7 @@
#include <net/pkt_cls.h>
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
+#include <linux/bpf.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"
@@ -50,7 +51,7 @@ struct mlx5e_sq_param {
struct mlx5_wq_param wq;
u16 max_inline;
u8 min_inline_mode;
- bool icosq;
+ enum mlx5e_sq_type type;
};
struct mlx5e_cq_param {
@@ -63,12 +64,55 @@ struct mlx5e_cq_param {
struct mlx5e_channel_param {
struct mlx5e_rq_param rq;
struct mlx5e_sq_param sq;
+ struct mlx5e_sq_param xdp_sq;
struct mlx5e_sq_param icosq;
struct mlx5e_cq_param rx_cq;
struct mlx5e_cq_param tx_cq;
struct mlx5e_cq_param icosq_cq;
};
+static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_GEN(mdev, striding_rq) &&
+ MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+ MLX5_CAP_ETH(mdev, reg_umr_sq);
+}
+
+static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
+{
+ priv->params.rq_wq_type = rq_type;
+ switch (priv->params.rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+ priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ?
+ MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
+ MLX5_MPWRQ_LOG_STRIDE_SIZE;
+ priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
+ priv->params.mpwqe_log_stride_sz;
+ break;
+ default: /* MLX5_WQ_TYPE_LINKED_LIST */
+ priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+ }
+ priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
+ BIT(priv->params.log_rq_size));
+
+ mlx5_core_info(priv->mdev,
+ "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+ priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+ BIT(priv->params.log_rq_size),
+ BIT(priv->params.mpwqe_log_stride_sz),
+ priv->params.rx_cqe_compress_admin);
+}
+
+static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv)
+{
+ u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) &&
+ !priv->xdp_prog ?
+ MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+ MLX5_WQ_TYPE_LINKED_LIST;
+ mlx5e_set_rq_type_params(priv, rq_type);
+}
+
static void mlx5e_update_carrier(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
@@ -136,6 +180,9 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_complete += rq_stats->csum_complete;
s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+ s->rx_xdp_drop += rq_stats->xdp_drop;
+ s->rx_xdp_tx += rq_stats->xdp_tx;
+ s->rx_xdp_tx_full += rq_stats->xdp_tx_full;
s->rx_wqe_err += rq_stats->wqe_err;
s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
@@ -314,7 +361,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq,
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
struct mlx5_wqe_data_seg *dseg = &wqe->data;
- struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
@@ -342,21 +389,21 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
int i;
- rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
- GFP_KERNEL, cpu_to_node(c->cpu));
- if (!rq->wqe_info)
+ rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->mpwqe.info)
goto err_out;
/* We allocate more than mtt_sz as we will align the pointer */
- rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+ rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
cpu_to_node(c->cpu));
- if (unlikely(!rq->mtt_no_align))
+ if (unlikely(!rq->mpwqe.mtt_no_align))
goto err_free_wqe_info;
for (i = 0; i < wq_sz; i++) {
- struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
- wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc,
+ wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc,
MLX5_UMR_ALIGN);
wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz,
PCI_DMA_TODEVICE);
@@ -370,14 +417,14 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
err_unmap_mtts:
while (--i >= 0) {
- struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz,
PCI_DMA_TODEVICE);
}
- kfree(rq->mtt_no_align);
+ kfree(rq->mpwqe.mtt_no_align);
err_free_wqe_info:
- kfree(rq->wqe_info);
+ kfree(rq->mpwqe.info);
err_out:
return -ENOMEM;
@@ -390,13 +437,13 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
int i;
for (i = 0; i < wq_sz; i++) {
- struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz,
PCI_DMA_TODEVICE);
}
- kfree(rq->mtt_no_align);
- kfree(rq->wqe_info);
+ kfree(rq->mpwqe.mtt_no_align);
+ kfree(rq->mpwqe.info);
}
static int mlx5e_create_rq(struct mlx5e_channel *c,
@@ -408,6 +455,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
void *rqc = param->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
u32 byte_count;
+ u32 frag_sz;
+ int npages;
int wq_sz;
int err;
int i;
@@ -430,6 +479,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
rq->channel = c;
rq->ix = c->ix;
rq->priv = c->priv;
+ rq->xdp_prog = priv->xdp_prog;
+
+ rq->buff.map_dir = DMA_FROM_DEVICE;
+ if (rq->xdp_prog)
+ rq->buff.map_dir = DMA_BIDIRECTIONAL;
switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -437,34 +491,45 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
- rq->mpwqe_mtt_offset = c->ix *
+ rq->mpwqe.mtt_offset = c->ix *
MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size));
rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
- rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
- byte_count = rq->wqe_sz;
+
+ rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
+ byte_count = rq->buff.wqe_sz;
rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
err = mlx5e_rq_alloc_mpwqe_info(rq, c);
if (err)
goto err_rq_wq_destroy;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
- rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
- cpu_to_node(c->cpu));
- if (!rq->skb) {
+ rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->dma_info) {
err = -ENOMEM;
goto err_rq_wq_destroy;
}
+
rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
rq->alloc_wqe = mlx5e_alloc_rx_wqe;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
- rq->wqe_sz = (priv->params.lro_en) ?
+ rq->buff.wqe_sz = (priv->params.lro_en) ?
priv->params.lro_wqe_sz :
MLX5E_SW2HW_MTU(priv->netdev->mtu);
- rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz);
- byte_count = rq->wqe_sz;
+ byte_count = rq->buff.wqe_sz;
+
+ /* calc the required page order */
+ frag_sz = MLX5_RX_HEADROOM +
+ byte_count /* packet data */ +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ frag_sz = SKB_DATA_ALIGN(frag_sz);
+
+ npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE);
+ rq->buff.page_order = order_base_2(npages);
+
byte_count |= MLX5_HW_START_PADDING;
rq->mkey_be = c->mkey_be;
}
@@ -482,6 +547,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
rq->page_cache.head = 0;
rq->page_cache.tail = 0;
+ if (rq->xdp_prog)
+ bpf_prog_add(rq->xdp_prog, 1);
+
return 0;
err_rq_wq_destroy:
@@ -494,12 +562,15 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
{
int i;
+ if (rq->xdp_prog)
+ bpf_prog_put(rq->xdp_prog);
+
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
mlx5e_rq_free_mpwqe_info(rq);
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
- kfree(rq->skb);
+ kfree(rq->dma_info);
}
for (i = rq->page_cache.head; i != rq->page_cache.tail;
@@ -641,7 +712,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
/* UMR WQE (if in progress) is always at wq->head */
if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
- mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]);
+ mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
while (!mlx5_wq_ll_is_empty(wq)) {
wqe_ix_be = *wq->tail_next;
@@ -676,8 +747,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
if (param->am_enabled)
set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
- sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP;
- sq->ico_wqe_info[pi].num_wqebbs = 1;
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ sq->db.ico_wqe[pi].num_wqebbs = 1;
mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
return 0;
@@ -701,26 +772,65 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
mlx5e_destroy_rq(rq);
}
-static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
+static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq)
{
- kfree(sq->wqe_info);
- kfree(sq->dma_fifo);
- kfree(sq->skb);
+ kfree(sq->db.xdp.di);
+ kfree(sq->db.xdp.wqe_info);
}
-static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
+static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
- int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
- sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa);
- sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL,
- numa);
- sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL,
- numa);
+ sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz,
+ GFP_KERNEL, numa);
+ sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz,
+ GFP_KERNEL, numa);
+ if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) {
+ mlx5e_free_sq_xdp_db(sq);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq)
+{
+ kfree(sq->db.ico_wqe);
+}
+
+static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa)
+{
+ u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
- if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) {
- mlx5e_free_sq_db(sq);
+ sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz,
+ GFP_KERNEL, numa);
+ if (!sq->db.ico_wqe)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq)
+{
+ kfree(sq->db.txq.wqe_info);
+ kfree(sq->db.txq.dma_fifo);
+ kfree(sq->db.txq.skb);
+}
+
+static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+ sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb),
+ GFP_KERNEL, numa);
+ sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo),
+ GFP_KERNEL, numa);
+ sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info),
+ GFP_KERNEL, numa);
+ if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) {
+ mlx5e_free_sq_txq_db(sq);
return -ENOMEM;
}
@@ -729,6 +839,46 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
return 0;
}
+static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
+{
+ switch (sq->type) {
+ case MLX5E_SQ_TXQ:
+ mlx5e_free_sq_txq_db(sq);
+ break;
+ case MLX5E_SQ_ICO:
+ mlx5e_free_sq_ico_db(sq);
+ break;
+ case MLX5E_SQ_XDP:
+ mlx5e_free_sq_xdp_db(sq);
+ break;
+ }
+}
+
+static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
+{
+ switch (sq->type) {
+ case MLX5E_SQ_TXQ:
+ return mlx5e_alloc_sq_txq_db(sq, numa);
+ case MLX5E_SQ_ICO:
+ return mlx5e_alloc_sq_ico_db(sq, numa);
+ case MLX5E_SQ_XDP:
+ return mlx5e_alloc_sq_xdp_db(sq, numa);
+ }
+
+ return 0;
+}
+
+static int mlx5e_sq_get_max_wqebbs(u8 sq_type)
+{
+ switch (sq_type) {
+ case MLX5E_SQ_ICO:
+ return MLX5E_ICOSQ_MAX_WQEBBS;
+ case MLX5E_SQ_XDP:
+ return MLX5E_XDP_TX_WQEBBS;
+ }
+ return MLX5_SEND_WQE_MAX_WQEBBS;
+}
+
static int mlx5e_create_sq(struct mlx5e_channel *c,
int tc,
struct mlx5e_sq_param *param,
@@ -741,6 +891,13 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
int err;
+ sq->type = param->type;
+ sq->pdev = c->pdev;
+ sq->tstamp = &priv->tstamp;
+ sq->mkey_be = c->mkey_be;
+ sq->channel = c;
+ sq->tc = tc;
+
err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf));
if (err)
return err;
@@ -769,18 +926,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
if (err)
goto err_sq_wq_destroy;
- if (param->icosq) {
- u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
-
- sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) *
- wq_sz,
- GFP_KERNEL,
- cpu_to_node(c->cpu));
- if (!sq->ico_wqe_info) {
- err = -ENOMEM;
- goto err_free_sq_db;
- }
- } else {
+ if (sq->type == MLX5E_SQ_TXQ) {
int txq_ix;
txq_ix = c->ix + tc * priv->params.num_channels;
@@ -788,19 +934,11 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
priv->txq_to_sq_map[txq_ix] = sq;
}
- sq->pdev = c->pdev;
- sq->tstamp = &priv->tstamp;
- sq->mkey_be = c->mkey_be;
- sq->channel = c;
- sq->tc = tc;
- sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
+ sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type);
sq->bf_budget = MLX5E_SQ_BF_BUDGET;
return 0;
-err_free_sq_db:
- mlx5e_free_sq_db(sq);
-
err_sq_wq_destroy:
mlx5_wq_destroy(&sq->wq_ctrl);
@@ -815,7 +953,6 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq)
struct mlx5e_channel *c = sq->channel;
struct mlx5e_priv *priv = c->priv;
- kfree(sq->ico_wqe_info);
mlx5e_free_sq_db(sq);
mlx5_wq_destroy(&sq->wq_ctrl);
mlx5_unmap_free_uar(priv->mdev, &sq->uar);
@@ -844,11 +981,12 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
memcpy(sqc, param->sqc, sizeof(param->sqc));
- MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]);
+ MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ?
+ 0 : priv->tisn[sq->tc]);
MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
- MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1);
+ MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
@@ -963,12 +1101,14 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
netif_tx_disable_queue(sq->txq);
/* last doorbell out, godspeed .. */
- if (mlx5e_sq_has_room_for(sq, 1))
+ if (mlx5e_sq_has_room_for(sq, 1)) {
+ sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL;
mlx5e_send_nop(sq, true);
+ }
}
mlx5e_disable_sq(sq);
- mlx5e_free_tx_descs(sq);
+ mlx5e_free_sq_descs(sq);
mlx5e_destroy_sq(sq);
}
@@ -1329,14 +1469,31 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
}
}
+ if (priv->xdp_prog) {
+ /* XDP SQ CQ params are same as normal TXQ sq CQ params */
+ err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
+ priv->params.tx_cq_moderation);
+ if (err)
+ goto err_close_sqs;
+
+ err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq);
+ if (err) {
+ mlx5e_close_cq(&c->xdp_sq.cq);
+ goto err_close_sqs;
+ }
+ }
+
+ c->xdp = !!priv->xdp_prog;
err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
if (err)
- goto err_close_sqs;
+ goto err_close_xdp_sq;
netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix);
*cp = c;
return 0;
+err_close_xdp_sq:
+ mlx5e_close_sq(&c->xdp_sq);
err_close_sqs:
mlx5e_close_sqs(c);
@@ -1365,9 +1522,13 @@ err_napi_del:
static void mlx5e_close_channel(struct mlx5e_channel *c)
{
mlx5e_close_rq(&c->rq);
+ if (c->xdp)
+ mlx5e_close_sq(&c->xdp_sq);
mlx5e_close_sqs(c);
mlx5e_close_sq(&c->icosq);
napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->xdp_sq.cq);
mlx5e_close_cq(&c->rq.cq);
mlx5e_close_tx_cqs(c);
mlx5e_close_cq(&c->icosq.cq);
@@ -1441,6 +1602,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
param->max_inline = priv->params.tx_max_inline;
param->min_inline_mode = priv->params.tx_min_inline_mode;
+ param->type = MLX5E_SQ_TXQ;
}
static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -1514,7 +1676,22 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
- param->icosq = true;
+ param->type = MLX5E_SQ_ICO;
+}
+
+static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+ MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
+
+ param->max_inline = priv->params.tx_max_inline;
+ /* FOR XDP SQs will support only L2 inline mode */
+ param->min_inline_mode = MLX5_INLINE_MODE_NONE;
+ param->type = MLX5E_SQ_XDP;
}
static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam)
@@ -1523,6 +1700,7 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_chan
mlx5e_build_rq_param(priv, &cparam->rq);
mlx5e_build_sq_param(priv, &cparam->sq);
+ mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq);
mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz);
mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
@@ -2901,6 +3079,92 @@ static void mlx5e_tx_timeout(struct net_device *dev)
schedule_work(&priv->tx_timeout_work);
}
+static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct bpf_prog *old_prog;
+ int err = 0;
+ bool reset, was_opened;
+ int i;
+
+ mutex_lock(&priv->state_lock);
+
+ if ((netdev->features & NETIF_F_LRO) && prog) {
+ netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ /* no need for full reset when exchanging programs */
+ reset = (!priv->xdp_prog || !prog);
+
+ if (was_opened && reset)
+ mlx5e_close_locked(netdev);
+
+ /* exchange programs */
+ old_prog = xchg(&priv->xdp_prog, prog);
+ if (prog)
+ bpf_prog_add(prog, 1);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (reset) /* change RQ type according to priv->xdp_prog */
+ mlx5e_set_rq_priv_params(priv);
+
+ if (was_opened && reset)
+ mlx5e_open_locked(netdev);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+ goto unlock;
+
+ /* exchanging programs w/o reset, we update ref counts on behalf
+ * of the channels RQs here.
+ */
+ bpf_prog_add(prog, priv->params.num_channels);
+ for (i = 0; i < priv->params.num_channels; i++) {
+ struct mlx5e_channel *c = priv->channel[i];
+
+ set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+ napi_synchronize(&c->napi);
+ /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
+
+ old_prog = xchg(&c->rq.xdp_prog, prog);
+
+ clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+ /* napi_schedule in case we have missed anything */
+ set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags);
+ napi_schedule(&c->napi);
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static bool mlx5e_xdp_attached(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return !!priv->xdp_prog;
+}
+
+static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return mlx5e_xdp_set(dev, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_attached = mlx5e_xdp_attached(dev);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops mlx5e_netdev_ops_basic = {
.ndo_open = mlx5e_open,
.ndo_stop = mlx5e_close,
@@ -2920,6 +3184,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
.ndo_rx_flow_steer = mlx5e_rx_flow_steer,
#endif
.ndo_tx_timeout = mlx5e_tx_timeout,
+ .ndo_xdp = mlx5e_xdp,
};
static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2951,6 +3216,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
.ndo_tx_timeout = mlx5e_tx_timeout,
+ .ndo_xdp = mlx5e_xdp,
};
static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -3025,13 +3291,6 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
indirection_rqt[i] = i % num_channels;
}
-static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
-{
- return MLX5_CAP_GEN(mdev, striding_rq) &&
- MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
- MLX5_CAP_ETH(mdev, reg_umr_sq);
-}
-
static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
{
enum pcie_link_width width;
@@ -3111,11 +3370,13 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
- priv->params.log_sq_size =
- MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
- priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ?
- MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
- MLX5_WQ_TYPE_LINKED_LIST;
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->params.num_channels = profile->max_nch(mdev);
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+
+ priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
/* set CQE compression */
priv->params.rx_cqe_compress_admin = false;
@@ -3128,33 +3389,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
priv->params.rx_cqe_compress_admin =
cqe_compress_heuristic(link_speed, pci_bw);
}
-
priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;
- switch (priv->params.rq_wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
- priv->params.mpwqe_log_stride_sz =
- priv->params.rx_cqe_compress ?
- MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
- MLX5_MPWRQ_LOG_STRIDE_SIZE;
- priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
- priv->params.mpwqe_log_stride_sz;
+ mlx5e_set_rq_priv_params(priv);
+ if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
priv->params.lro_en = true;
- break;
- default: /* MLX5_WQ_TYPE_LINKED_LIST */
- priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
- }
-
- mlx5_core_info(mdev,
- "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
- priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
- BIT(priv->params.log_rq_size),
- BIT(priv->params.mpwqe_log_stride_sz),
- priv->params.rx_cqe_compress_admin);
-
- priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
- BIT(priv->params.log_rq_size));
priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
@@ -3174,19 +3413,16 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev));
- priv->params.lro_wqe_sz =
- MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+ priv->params.lro_wqe_sz =
+ MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ -
+ /* Extra room needed for build_skb */
+ MLX5_RX_HEADROOM -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/* Initialize pflags */
MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
- priv->mdev = mdev;
- priv->netdev = netdev;
- priv->params.num_channels = profile->max_nch(mdev);
- priv->profile = profile;
- priv->ppriv = ppriv;
-
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_ets_init(priv);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index dc8677933f76..0a81bd34abbe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -179,50 +179,99 @@ unlock:
mutex_unlock(&priv->state_lock);
}
-int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT)
+
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
{
- struct sk_buff *skb;
- dma_addr_t dma_addr;
+ struct mlx5e_page_cache *cache = &rq->page_cache;
+ u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
- skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz);
- if (unlikely(!skb))
- return -ENOMEM;
+ if (tail_next == cache->head) {
+ rq->stats.cache_full++;
+ return false;
+ }
- dma_addr = dma_map_single(rq->pdev,
- /* hw start padding */
- skb->data,
- /* hw end padding */
- rq->wqe_sz,
- DMA_FROM_DEVICE);
+ cache->page_cache[cache->tail] = *dma_info;
+ cache->tail = tail_next;
+ return true;
+}
- if (unlikely(dma_mapping_error(rq->pdev, dma_addr)))
- goto err_free_skb;
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct mlx5e_page_cache *cache = &rq->page_cache;
- *((dma_addr_t *)skb->cb) = dma_addr;
- wqe->data.addr = cpu_to_be64(dma_addr);
+ if (unlikely(cache->head == cache->tail)) {
+ rq->stats.cache_empty++;
+ return false;
+ }
- rq->skb[ix] = skb;
+ if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+ rq->stats.cache_busy++;
+ return false;
+ }
+
+ *dma_info = cache->page_cache[cache->head];
+ cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
+ rq->stats.cache_reuse++;
+
+ dma_sync_single_for_device(rq->pdev, dma_info->addr,
+ RQ_PAGE_SIZE(rq),
+ DMA_FROM_DEVICE);
+ return true;
+}
+
+static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct page *page;
+
+ if (mlx5e_rx_cache_get(rq, dma_info))
+ return 0;
+
+ page = dev_alloc_pages(rq->buff.page_order);
+ if (unlikely(!page))
+ return -ENOMEM;
+
+ dma_info->page = page;
+ dma_info->addr = dma_map_page(rq->pdev, page, 0,
+ RQ_PAGE_SIZE(rq), rq->buff.map_dir);
+ if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
+ put_page(page);
+ return -ENOMEM;
+ }
return 0;
+}
-err_free_skb:
- dev_kfree_skb(skb);
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+ bool recycle)
+{
+ if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
+ return;
- return -ENOMEM;
+ dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
+ rq->buff.map_dir);
+ put_page(dma_info->page);
+}
+
+int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+{
+ struct mlx5e_dma_info *di = &rq->dma_info[ix];
+
+ if (unlikely(mlx5e_page_alloc_mapped(rq, di)))
+ return -ENOMEM;
+
+ wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM);
+ return 0;
}
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
{
- struct sk_buff *skb = rq->skb[ix];
+ struct mlx5e_dma_info *di = &rq->dma_info[ix];
- if (skb) {
- rq->skb[ix] = NULL;
- dma_unmap_single(rq->pdev,
- *((dma_addr_t *)skb->cb),
- rq->wqe_sz,
- DMA_FROM_DEVICE);
- dev_kfree_skb(skb);
- }
+ mlx5e_page_release(rq, di, true);
}
static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
@@ -279,7 +328,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
{
- struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
struct mlx5e_sq *sq = &rq->channel->icosq;
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *wqe;
@@ -288,8 +337,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
/* fill sq edge with nops to avoid wqe wrap around */
while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
- sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP;
- sq->ico_wqe_info[pi].num_wqebbs = 1;
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ sq->db.ico_wqe[pi].num_wqebbs = 1;
mlx5e_send_nop(sq, true);
}
@@ -299,90 +348,17 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
MLX5_OPCODE_UMR);
- sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR;
- sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs;
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+ sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs;
sq->pc += num_wqebbs;
mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
}
-static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- struct mlx5e_page_cache *cache = &rq->page_cache;
- u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
-
- if (tail_next == cache->head) {
- rq->stats.cache_full++;
- return false;
- }
-
- cache->page_cache[cache->tail] = *dma_info;
- cache->tail = tail_next;
- return true;
-}
-
-static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- struct mlx5e_page_cache *cache = &rq->page_cache;
-
- if (unlikely(cache->head == cache->tail)) {
- rq->stats.cache_empty++;
- return false;
- }
-
- if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
- rq->stats.cache_busy++;
- return false;
- }
-
- *dma_info = cache->page_cache[cache->head];
- cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
- rq->stats.cache_reuse++;
-
- dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
- DMA_FROM_DEVICE);
- return true;
-}
-
-static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- struct page *page;
-
- if (mlx5e_rx_cache_get(rq, dma_info))
- return 0;
-
- page = dev_alloc_page();
- if (unlikely(!page))
- return -ENOMEM;
-
- dma_info->page = page;
- dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
- put_page(page);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
- bool recycle)
-{
- if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
- return;
-
- dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE);
- put_page(dma_info->page);
-}
-
static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
struct mlx5e_rx_wqe *wqe,
u16 ix)
{
- struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
int err;
@@ -436,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) {
- mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]);
+ mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
return;
}
@@ -448,7 +424,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
mlx5_wq_ll_update_db_record(wq);
}
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
{
int err;
@@ -462,7 +438,7 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
- struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
mlx5e_free_rx_mpwqe(rq, wi);
}
@@ -656,33 +632,154 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
napi_gro_receive(rq->cq.napi, skb);
}
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+ wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
+}
+
+static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *di,
+ unsigned int data_offset,
+ int len)
+{
+ struct mlx5e_sq *sq = &rq->channel->xdp_sq;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = sq->pc & wq->sz_m1;
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi];
+
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg;
+
+ dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE;
+ unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE;
+ void *data = page_address(di->page) + data_offset;
+
+ if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) {
+ if (sq->db.xdp.doorbell) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->db.xdp.doorbell = false;
+ }
+ rq->stats.xdp_tx_full++;
+ mlx5e_page_release(rq, di, true);
+ return;
+ }
+
+ dma_sync_single_for_device(sq->pdev, dma_addr, dma_len,
+ PCI_DMA_TODEVICE);
+
+ memset(wqe, 0, sizeof(*wqe));
+
+ /* copy the inline part */
+ memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE);
+ eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+
+ dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1);
+
+ /* write the dma part */
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT);
+
+ sq->db.xdp.di[pi] = *di;
+ wi->opcode = MLX5_OPCODE_SEND;
+ wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS;
+ sq->pc += MLX5E_XDP_TX_WQEBBS;
+
+ sq->db.xdp.doorbell = true;
+ rq->stats.xdp_tx++;
+}
+
+/* returns true if packet was consumed by xdp */
+static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
+ const struct bpf_prog *prog,
+ struct mlx5e_dma_info *di,
+ void *data, u16 len)
+{
+ struct xdp_buff xdp;
+ u32 act;
+
+ if (!prog)
+ return false;
+
+ xdp.data = data;
+ xdp.data_end = xdp.data + len;
+ act = bpf_prog_run_xdp(prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ return false;
+ case XDP_TX:
+ mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len);
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ case XDP_ABORTED:
+ case XDP_DROP:
+ rq->stats.xdp_drop++;
+ mlx5e_page_release(rq, di, true);
+ return true;
+ }
+}
+
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
+ struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
+ struct mlx5e_dma_info *di;
struct mlx5e_rx_wqe *wqe;
- struct sk_buff *skb;
__be16 wqe_counter_be;
+ struct sk_buff *skb;
u16 wqe_counter;
+ void *va, *data;
u32 cqe_bcnt;
wqe_counter_be = cqe->wqe_counter;
wqe_counter = be16_to_cpu(wqe_counter_be);
wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
- skb = rq->skb[wqe_counter];
- prefetch(skb->data);
- rq->skb[wqe_counter] = NULL;
-
- dma_unmap_single(rq->pdev,
- *((dma_addr_t *)skb->cb),
- rq->wqe_sz,
- DMA_FROM_DEVICE);
+ di = &rq->dma_info[wqe_counter];
+ va = page_address(di->page);
+ data = va + MLX5_RX_HEADROOM;
+
+ dma_sync_single_range_for_cpu(rq->pdev,
+ di->addr,
+ MLX5_RX_HEADROOM,
+ rq->buff.wqe_sz,
+ DMA_FROM_DEVICE);
+ prefetch(data);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
rq->stats.wqe_err++;
- dev_kfree_skb(skb);
+ mlx5e_page_release(rq, di, true);
goto wq_ll_pop;
}
- cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+ if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
+ goto wq_ll_pop; /* page/packet was consumed by XDP */
+
+ skb = build_skb(va, RQ_PAGE_SIZE(rq));
+ if (unlikely(!skb)) {
+ rq->stats.buff_alloc_err++;
+ mlx5e_page_release(rq, di, true);
+ goto wq_ll_pop;
+ }
+
+ /* queue up for recycling ..*/
+ page_ref_inc(di->page);
+ mlx5e_page_release(rq, di, true);
+
+ skb_reserve(skb, MLX5_RX_HEADROOM);
skb_put(skb, cqe_bcnt);
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
@@ -734,7 +831,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
- struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id];
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id);
struct sk_buff *skb;
u16 cqe_bcnt;
@@ -776,6 +873,7 @@ mpwrq_cqe_out:
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
+ struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq;
int work_done = 0;
if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state)))
@@ -802,6 +900,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
rq->handle_rx_cqe(rq, cqe);
}
+ if (xdp_sq->db.xdp.doorbell) {
+ mlx5e_xmit_xdp_doorbell(xdp_sq);
+ xdp_sq->db.xdp.doorbell = false;
+ }
+
mlx5_cqwq_update_db_record(&cq->wq);
/* ensure cq space is freed before enabling more cqes */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 6af8d79e8c2a..57452fdc5154 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -65,6 +65,9 @@ struct mlx5e_sw_stats {
u64 rx_csum_none;
u64 rx_csum_complete;
u64 rx_csum_unnecessary_inner;
+ u64 rx_xdp_drop;
+ u64 rx_xdp_tx;
+ u64 rx_xdp_tx_full;
u64 tx_csum_partial;
u64 tx_csum_partial_inner;
u64 tx_queue_stopped;
@@ -100,6 +103,9 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
@@ -278,6 +284,9 @@ struct mlx5e_rq_stats {
u64 csum_none;
u64 lro_packets;
u64 lro_bytes;
+ u64 xdp_drop;
+ u64 xdp_tx;
+ u64 xdp_tx_full;
u64 wqe_err;
u64 mpwqe_filler;
u64 buff_alloc_err;
@@ -295,6 +304,9 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index eb0e72537f10..70a717382357 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -52,7 +52,6 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw)
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01);
- sq->skb[pi] = NULL;
sq->pc++;
sq->stats.nop++;
@@ -82,15 +81,17 @@ static inline void mlx5e_dma_push(struct mlx5e_sq *sq,
u32 size,
enum mlx5e_dma_map_type map_type)
{
- sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
- sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
- sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type;
+ u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask;
+
+ sq->db.txq.dma_fifo[i].addr = addr;
+ sq->db.txq.dma_fifo[i].size = size;
+ sq->db.txq.dma_fifo[i].type = map_type;
sq->dma_fifo_pc++;
}
static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
{
- return &sq->dma_fifo[i & sq->dma_fifo_mask];
+ return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask];
}
static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma)
@@ -221,7 +222,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
u16 pi = sq->pc & wq->sz_m1;
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi];
+ struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi];
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
@@ -341,7 +342,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- sq->skb[pi] = skb;
+ sq->db.txq.skb[pi] = skb;
wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
sq->pc += wi->num_wqebbs;
@@ -368,8 +369,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
}
/* fill sq edge with nops to avoid wqe wrap around */
- while ((sq->pc & wq->sz_m1) > sq->edge)
+ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
+ sq->db.txq.skb[pi] = NULL;
mlx5e_send_nop(sq, false);
+ }
if (bf)
sq->bf_budget--;
@@ -442,8 +445,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
last_wqe = (sqcc == wqe_counter);
ci = sqcc & sq->wq.sz_m1;
- skb = sq->skb[ci];
- wi = &sq->wqe_info[ci];
+ skb = sq->db.txq.skb[ci];
+ wi = &sq->db.txq.wqe_info[ci];
if (unlikely(!skb)) { /* nop */
sqcc++;
@@ -492,7 +495,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq)
{
struct mlx5e_tx_wqe_info *wi;
struct sk_buff *skb;
@@ -501,8 +504,8 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
while (sq->cc != sq->pc) {
ci = sq->cc & sq->wq.sz_m1;
- skb = sq->skb[ci];
- wi = &sq->wqe_info[ci];
+ skb = sq->db.txq.skb[ci];
+ wi = &sq->db.txq.wqe_info[ci];
if (!skb) { /* nop */
sq->cc++;
@@ -520,3 +523,37 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
sq->cc += wi->num_wqebbs;
}
}
+
+static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq)
+{
+ struct mlx5e_sq_wqe_info *wi;
+ struct mlx5e_dma_info *di;
+ u16 ci;
+
+ while (sq->cc != sq->pc) {
+ ci = sq->cc & sq->wq.sz_m1;
+ di = &sq->db.xdp.di[ci];
+ wi = &sq->db.xdp.wqe_info[ci];
+
+ if (wi->opcode == MLX5_OPCODE_NOP) {
+ sq->cc++;
+ continue;
+ }
+
+ sq->cc += wi->num_wqebbs;
+
+ mlx5e_page_release(&sq->channel->rq, di, false);
+ }
+}
+
+void mlx5e_free_sq_descs(struct mlx5e_sq *sq)
+{
+ switch (sq->type) {
+ case MLX5E_SQ_TXQ:
+ mlx5e_free_txq_sq_descs(sq);
+ break;
+ case MLX5E_SQ_XDP:
+ mlx5e_free_xdp_sq_descs(sq);
+ break;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 08d8b0c91f07..5703f19a6a24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
do {
u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
- struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci];
+ struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
mlx5_cqwq_pop(&cq->wq);
sqcc += icowi->num_wqebbs;
@@ -105,6 +105,66 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
sq->cc = sqcc;
}
+static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq)
+{
+ struct mlx5e_sq *sq;
+ u16 sqcc;
+ int i;
+
+ sq = container_of(cq, struct mlx5e_sq, cq);
+
+ if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+ return false;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
+ struct mlx5_cqe64 *cqe;
+ u16 wqe_counter;
+ bool last_wqe;
+
+ cqe = mlx5e_get_cqe(cq);
+ if (!cqe)
+ break;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ do {
+ struct mlx5e_sq_wqe_info *wi;
+ struct mlx5e_dma_info *di;
+ u16 ci;
+
+ last_wqe = (sqcc == wqe_counter);
+
+ ci = sqcc & sq->wq.sz_m1;
+ di = &sq->db.xdp.di[ci];
+ wi = &sq->db.xdp.wqe_info[ci];
+
+ if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) {
+ sqcc++;
+ continue;
+ }
+
+ sqcc += wi->num_wqebbs;
+ /* Recycle RX page */
+ mlx5e_page_release(&sq->channel->rq, di, true);
+ } while (!last_wqe);
+ }
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->cc = sqcc;
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
int mlx5e_napi_poll(struct napi_struct *napi, int budget)
{
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -121,6 +181,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
busy |= work_done == budget;
+ if (c->xdp)
+ busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq);
+
mlx5e_poll_ico_cq(&c->icosq.cq);
busy |= mlx5e_post_rx_wqes(&c->rq);
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 68178819ff12..0efb2ba9a558 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -3,6 +3,13 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o
nfp_netvf-objs := \
nfp_net_common.o \
nfp_net_ethtool.o \
+ nfp_net_offload.o \
nfp_netvf_main.o
+ifeq ($(CONFIG_BPF_SYSCALL),y)
+nfp_netvf-objs += \
+ nfp_bpf_verifier.o \
+ nfp_bpf_jit.o
+endif
+
nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
new file mode 100644
index 000000000000..22484b6fd3e8
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ASM_H__
+#define __NFP_ASM_H__ 1
+
+#include "nfp_bpf.h"
+
+#define REG_NONE 0
+
+#define RE_REG_NO_DST 0x020
+#define RE_REG_IMM 0x020
+#define RE_REG_IMM_encode(x) \
+ (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
+#define RE_REG_IMM_MAX 0x07fULL
+#define RE_REG_XFR 0x080
+
+#define UR_REG_XFR 0x180
+#define UR_REG_NN 0x280
+#define UR_REG_NO_DST 0x300
+#define UR_REG_IMM UR_REG_NO_DST
+#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
+#define UR_REG_IMM_MAX 0x0ffULL
+
+#define OP_BR_BASE 0x0d800000020ULL
+#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL
+#define OP_BR_MASK 0x0000000001fULL
+#define OP_BR_EV_PIP 0x00000000300ULL
+#define OP_BR_CSS 0x0000003c000ULL
+#define OP_BR_DEFBR 0x00000300000ULL
+#define OP_BR_ADDR_LO 0x007ffc00000ULL
+#define OP_BR_ADDR_HI 0x10000000000ULL
+
+#define nfp_is_br(_insn) \
+ (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
+
+enum br_mask {
+ BR_BEQ = 0x00,
+ BR_BNE = 0x01,
+ BR_BHS = 0x04,
+ BR_BLO = 0x05,
+ BR_BGE = 0x08,
+ BR_UNC = 0x18,
+};
+
+enum br_ev_pip {
+ BR_EV_PIP_UNCOND = 0,
+ BR_EV_PIP_COND = 1,
+};
+
+enum br_ctx_signal_state {
+ BR_CSS_NONE = 2,
+};
+
+#define OP_BBYTE_BASE 0x0c800000000ULL
+#define OP_BB_A_SRC 0x000000000ffULL
+#define OP_BB_BYTE 0x00000000300ULL
+#define OP_BB_B_SRC 0x0000003fc00ULL
+#define OP_BB_I8 0x00000040000ULL
+#define OP_BB_EQ 0x00000080000ULL
+#define OP_BB_DEFBR 0x00000300000ULL
+#define OP_BB_ADDR_LO 0x007ffc00000ULL
+#define OP_BB_ADDR_HI 0x10000000000ULL
+
+#define OP_BALU_BASE 0x0e800000000ULL
+#define OP_BA_A_SRC 0x000000003ffULL
+#define OP_BA_B_SRC 0x000000ffc00ULL
+#define OP_BA_DEFBR 0x00000300000ULL
+#define OP_BA_ADDR_HI 0x0007fc00000ULL
+
+#define OP_IMMED_A_SRC 0x000000003ffULL
+#define OP_IMMED_B_SRC 0x000000ffc00ULL
+#define OP_IMMED_IMM 0x0000ff00000ULL
+#define OP_IMMED_WIDTH 0x00060000000ULL
+#define OP_IMMED_INV 0x00080000000ULL
+#define OP_IMMED_SHIFT 0x00600000000ULL
+#define OP_IMMED_BASE 0x0f000000000ULL
+#define OP_IMMED_WR_AB 0x20000000000ULL
+
+enum immed_width {
+ IMMED_WIDTH_ALL = 0,
+ IMMED_WIDTH_BYTE = 1,
+ IMMED_WIDTH_WORD = 2,
+};
+
+enum immed_shift {
+ IMMED_SHIFT_0B = 0,
+ IMMED_SHIFT_1B = 1,
+ IMMED_SHIFT_2B = 2,
+};
+
+#define OP_SHF_BASE 0x08000000000ULL
+#define OP_SHF_A_SRC 0x000000000ffULL
+#define OP_SHF_SC 0x00000000300ULL
+#define OP_SHF_B_SRC 0x0000003fc00ULL
+#define OP_SHF_I8 0x00000040000ULL
+#define OP_SHF_SW 0x00000080000ULL
+#define OP_SHF_DST 0x0000ff00000ULL
+#define OP_SHF_SHIFT 0x001f0000000ULL
+#define OP_SHF_OP 0x00e00000000ULL
+#define OP_SHF_DST_AB 0x01000000000ULL
+#define OP_SHF_WR_AB 0x20000000000ULL
+
+enum shf_op {
+ SHF_OP_NONE = 0,
+ SHF_OP_AND = 2,
+ SHF_OP_OR = 5,
+};
+
+enum shf_sc {
+ SHF_SC_R_ROT = 0,
+ SHF_SC_R_SHF = 1,
+ SHF_SC_L_SHF = 2,
+ SHF_SC_R_DSHF = 3,
+};
+
+#define OP_ALU_A_SRC 0x000000003ffULL
+#define OP_ALU_B_SRC 0x000000ffc00ULL
+#define OP_ALU_DST 0x0003ff00000ULL
+#define OP_ALU_SW 0x00040000000ULL
+#define OP_ALU_OP 0x00f80000000ULL
+#define OP_ALU_DST_AB 0x01000000000ULL
+#define OP_ALU_BASE 0x0a000000000ULL
+#define OP_ALU_WR_AB 0x20000000000ULL
+
+enum alu_op {
+ ALU_OP_NONE = 0x00,
+ ALU_OP_ADD = 0x01,
+ ALU_OP_NEG = 0x04,
+ ALU_OP_AND = 0x08,
+ ALU_OP_SUB_C = 0x0d,
+ ALU_OP_ADD_C = 0x11,
+ ALU_OP_OR = 0x14,
+ ALU_OP_SUB = 0x15,
+ ALU_OP_XOR = 0x18,
+};
+
+enum alu_dst_ab {
+ ALU_DST_A = 0,
+ ALU_DST_B = 1,
+};
+
+#define OP_LDF_BASE 0x0c000000000ULL
+#define OP_LDF_A_SRC 0x000000000ffULL
+#define OP_LDF_SC 0x00000000300ULL
+#define OP_LDF_B_SRC 0x0000003fc00ULL
+#define OP_LDF_I8 0x00000040000ULL
+#define OP_LDF_SW 0x00000080000ULL
+#define OP_LDF_ZF 0x00000100000ULL
+#define OP_LDF_BMASK 0x0000f000000ULL
+#define OP_LDF_SHF 0x001f0000000ULL
+#define OP_LDF_WR_AB 0x20000000000ULL
+
+#define OP_CMD_A_SRC 0x000000000ffULL
+#define OP_CMD_CTX 0x00000000300ULL
+#define OP_CMD_B_SRC 0x0000003fc00ULL
+#define OP_CMD_TOKEN 0x000000c0000ULL
+#define OP_CMD_XFER 0x00001f00000ULL
+#define OP_CMD_CNT 0x0000e000000ULL
+#define OP_CMD_SIG 0x000f0000000ULL
+#define OP_CMD_TGT_CMD 0x07f00000000ULL
+#define OP_CMD_MODE 0x1c0000000000ULL
+
+struct cmd_tgt_act {
+ u8 token;
+ u8 tgt_cmd;
+};
+
+enum cmd_tgt_map {
+ CMD_TGT_READ8,
+ CMD_TGT_WRITE8,
+ CMD_TGT_READ_LE,
+ CMD_TGT_READ_SWAP_LE,
+ __CMD_TGT_MAP_SIZE,
+};
+
+enum cmd_mode {
+ CMD_MODE_40b_AB = 0,
+ CMD_MODE_40b_BA = 1,
+ CMD_MODE_32b = 4,
+};
+
+enum cmd_ctx_swap {
+ CMD_CTX_SWAP = 0,
+ CMD_CTX_NO_SWAP = 3,
+};
+
+#define OP_LCSR_BASE 0x0fc00000000ULL
+#define OP_LCSR_A_SRC 0x000000003ffULL
+#define OP_LCSR_B_SRC 0x000000ffc00ULL
+#define OP_LCSR_WRITE 0x00000200000ULL
+#define OP_LCSR_ADDR 0x001ffc00000ULL
+
+enum lcsr_wr_src {
+ LCSR_WR_AREG,
+ LCSR_WR_BREG,
+ LCSR_WR_IMM,
+};
+
+#define OP_CARB_BASE 0x0e000000000ULL
+#define OP_CARB_OR 0x00000010000ULL
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
new file mode 100644
index 000000000000..fc220cd04115
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_BPF_H__
+#define __NFP_BPF_H__ 1
+
+#include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask)))
+
+/* For branch fixup logic use up-most byte of branch instruction as scratch
+ * area. Remember to clear this before sending instructions to HW!
+ */
+#define OP_BR_SPECIAL 0xff00000000000000ULL
+
+enum br_special {
+ OP_BR_NORMAL = 0,
+ OP_BR_GO_OUT,
+ OP_BR_GO_ABORT,
+};
+
+enum static_regs {
+ STATIC_REG_PKT = 1,
+#define REG_PKT_BANK ALU_DST_A
+ STATIC_REG_IMM = 2, /* Bank AB */
+};
+
+enum nfp_bpf_action_type {
+ NN_ACT_TC_DROP,
+ NN_ACT_TC_REDIR,
+ NN_ACT_DIRECT,
+};
+
+/* Software register representation, hardware encoding in asm.h */
+#define NN_REG_TYPE GENMASK(31, 24)
+#define NN_REG_VAL GENMASK(7, 0)
+
+enum nfp_bpf_reg_type {
+ NN_REG_GPR_A = BIT(0),
+ NN_REG_GPR_B = BIT(1),
+ NN_REG_NNR = BIT(2),
+ NN_REG_XFER = BIT(3),
+ NN_REG_IMM = BIT(4),
+ NN_REG_NONE = BIT(5),
+};
+
+#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B)
+
+#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
+#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
+#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
+#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
+#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
+#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
+#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
+
+#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT)
+#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM)
+
+#define NFP_BPF_ABI_FLAGS reg_nnr(0)
+#define NFP_BPF_ABI_FLAG_MARK 1
+#define NFP_BPF_ABI_MARK reg_nnr(1)
+#define NFP_BPF_ABI_PKT reg_nnr(2)
+#define NFP_BPF_ABI_LEN reg_nnr(3)
+
+struct nfp_prog;
+struct nfp_insn_meta;
+typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
+
+#define nfp_prog_first_meta(nfp_prog) \
+ list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_prog_last_meta(nfp_prog) \
+ list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_meta_next(meta) list_next_entry(meta, l)
+#define nfp_meta_prev(meta) list_prev_entry(meta, l)
+
+/**
+ * struct nfp_insn_meta - BPF instruction wrapper
+ * @insn: BPF instruction
+ * @off: index of first generated machine instruction (in nfp_prog.prog)
+ * @n: eBPF instruction number
+ * @skip: skip this instruction (optimized out)
+ * @double_cb: callback for second part of the instruction
+ * @l: link on nfp_prog->insns list
+ */
+struct nfp_insn_meta {
+ struct bpf_insn insn;
+ unsigned int off;
+ unsigned short n;
+ bool skip;
+ instr_cb_t double_cb;
+
+ struct list_head l;
+};
+
+#define BPF_SIZE_MASK 0x18
+
+static inline u8 mbpf_class(const struct nfp_insn_meta *meta)
+{
+ return BPF_CLASS(meta->insn.code);
+}
+
+static inline u8 mbpf_src(const struct nfp_insn_meta *meta)
+{
+ return BPF_SRC(meta->insn.code);
+}
+
+static inline u8 mbpf_op(const struct nfp_insn_meta *meta)
+{
+ return BPF_OP(meta->insn.code);
+}
+
+static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
+{
+ return BPF_MODE(meta->insn.code);
+}
+
+/**
+ * struct nfp_prog - nfp BPF program
+ * @prog: machine code
+ * @prog_len: number of valid instructions in @prog array
+ * @__prog_alloc_len: alloc size of @prog array
+ * @act: BPF program/action type (TC DA, TC with action, XDP etc.)
+ * @num_regs: number of registers used by this program
+ * @regs_per_thread: number of basic registers allocated per thread
+ * @start_off: address of the first instruction in the memory
+ * @tgt_out: jump target for normal exit
+ * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
+ * @tgt_done: jump target to get the next packet
+ * @n_translated: number of successfully translated instructions (for errors)
+ * @error: error code if something went wrong
+ * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
+ */
+struct nfp_prog {
+ u64 *prog;
+ unsigned int prog_len;
+ unsigned int __prog_alloc_len;
+
+ enum nfp_bpf_action_type act;
+
+ unsigned int num_regs;
+ unsigned int regs_per_thread;
+
+ unsigned int start_off;
+ unsigned int tgt_out;
+ unsigned int tgt_abort;
+ unsigned int tgt_done;
+
+ unsigned int n_translated;
+ int error;
+
+ struct list_head insns;
+};
+
+struct nfp_bpf_result {
+ unsigned int n_instr;
+ bool dense_mode;
+};
+
+#ifdef CONFIG_BPF_SYSCALL
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
+ unsigned int prog_start, unsigned int prog_done,
+ unsigned int prog_sz, struct nfp_bpf_result *res);
+#else
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
+ unsigned int prog_start, unsigned int prog_done,
+ unsigned int prog_sz, struct nfp_bpf_result *res)
+{
+ return -ENOTSUPP;
+}
+#endif
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
new file mode 100644
index 000000000000..3de819acd68c
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -0,0 +1,1811 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "NFP net bpf: " fmt
+
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/pkt_cls.h>
+#include <linux/unistd.h>
+
+#include "nfp_asm.h"
+#include "nfp_bpf.h"
+
+/* --- NFP prog --- */
+/* Foreach "multiple" entries macros provide pos and next<n> pointers.
+ * It's safe to modify the next pointers (but not pos).
+ */
+#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \
+ for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+ next = list_next_entry(pos, l); \
+ &(nfp_prog)->insns != &pos->l && \
+ &(nfp_prog)->insns != &next->l; \
+ pos = nfp_meta_next(pos), \
+ next = nfp_meta_next(pos))
+
+#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \
+ for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+ next = list_next_entry(pos, l), \
+ next2 = list_next_entry(next, l); \
+ &(nfp_prog)->insns != &pos->l && \
+ &(nfp_prog)->insns != &next->l && \
+ &(nfp_prog)->insns != &next2->l; \
+ pos = nfp_meta_next(pos), \
+ next = nfp_meta_next(pos), \
+ next2 = nfp_meta_next(next))
+
+static bool
+nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return meta->l.next != &nfp_prog->insns;
+}
+
+static bool
+nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return meta->l.prev != &nfp_prog->insns;
+}
+
+static void nfp_prog_free(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta, *tmp;
+
+ list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) {
+ list_del(&meta->l);
+ kfree(meta);
+ }
+ kfree(nfp_prog);
+}
+
+static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
+{
+ if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
+ nfp_prog->error = -ENOSPC;
+ return;
+ }
+
+ nfp_prog->prog[nfp_prog->prog_len] = insn;
+ nfp_prog->prog_len++;
+}
+
+static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
+{
+ return nfp_prog->start_off + nfp_prog->prog_len;
+}
+
+static unsigned int
+nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
+{
+ return offset - nfp_prog->start_off;
+}
+
+/* --- SW reg --- */
+struct nfp_insn_ur_regs {
+ enum alu_dst_ab dst_ab;
+ u16 dst;
+ u16 areg, breg;
+ bool swap;
+ bool wr_both;
+};
+
+struct nfp_insn_re_regs {
+ enum alu_dst_ab dst_ab;
+ u8 dst;
+ u8 areg, breg;
+ bool swap;
+ bool wr_both;
+ bool i8;
+};
+
+static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst)
+{
+ u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+ switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+ case NN_REG_GPR_A:
+ case NN_REG_GPR_B:
+ case NN_REG_GPR_BOTH:
+ return val;
+ case NN_REG_NNR:
+ return UR_REG_NN | val;
+ case NN_REG_XFER:
+ return UR_REG_XFR | val;
+ case NN_REG_IMM:
+ if (val & ~0xff) {
+ pr_err("immediate too large\n");
+ return 0;
+ }
+ return UR_REG_IMM_encode(val);
+ case NN_REG_NONE:
+ return is_dst ? UR_REG_NO_DST : REG_NONE;
+ default:
+ pr_err("unrecognized reg encoding %08x\n", swreg);
+ return 0;
+ }
+}
+
+static int
+swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg)
+{
+ memset(reg, 0, sizeof(*reg));
+
+ /* Decode destination */
+ if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+ return -EFAULT;
+
+ if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+ reg->dst_ab = ALU_DST_B;
+ if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+ reg->wr_both = true;
+ reg->dst = nfp_swreg_to_unreg(dst, true);
+
+ /* Decode source operands */
+ if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+ return -EFAULT;
+
+ if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+ FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+ reg->areg = nfp_swreg_to_unreg(rreg, false);
+ reg->breg = nfp_swreg_to_unreg(lreg, false);
+ reg->swap = true;
+ } else {
+ reg->areg = nfp_swreg_to_unreg(lreg, false);
+ reg->breg = nfp_swreg_to_unreg(rreg, false);
+ }
+
+ return 0;
+}
+
+static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8)
+{
+ u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+ switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+ case NN_REG_GPR_A:
+ case NN_REG_GPR_B:
+ case NN_REG_GPR_BOTH:
+ return val;
+ case NN_REG_XFER:
+ return RE_REG_XFR | val;
+ case NN_REG_IMM:
+ if (val & ~(0x7f | has_imm8 << 7)) {
+ pr_err("immediate too large\n");
+ return 0;
+ }
+ *i8 = val & 0x80;
+ return RE_REG_IMM_encode(val & 0x7f);
+ case NN_REG_NONE:
+ return is_dst ? RE_REG_NO_DST : REG_NONE;
+ default:
+ pr_err("unrecognized reg encoding\n");
+ return 0;
+ }
+}
+
+static int
+swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg,
+ bool has_imm8)
+{
+ memset(reg, 0, sizeof(*reg));
+
+ /* Decode destination */
+ if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+ return -EFAULT;
+
+ if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+ reg->dst_ab = ALU_DST_B;
+ if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+ reg->wr_both = true;
+ reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
+
+ /* Decode source operands */
+ if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+ return -EFAULT;
+
+ if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+ FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+ reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+ reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+ reg->swap = true;
+ } else {
+ reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+ reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+ }
+
+ return 0;
+}
+
+/* --- Emitters --- */
+static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
+ [CMD_TGT_WRITE8] = { 0x00, 0x42 },
+ [CMD_TGT_READ8] = { 0x01, 0x43 },
+ [CMD_TGT_READ_LE] = { 0x01, 0x40 },
+ [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
+};
+
+static void
+__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+ u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
+{
+ enum cmd_ctx_swap ctx;
+ u64 insn;
+
+ if (sync)
+ ctx = CMD_CTX_SWAP;
+ else
+ ctx = CMD_CTX_NO_SWAP;
+
+ insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
+ FIELD_PREP(OP_CMD_CTX, ctx) |
+ FIELD_PREP(OP_CMD_B_SRC, breg) |
+ FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
+ FIELD_PREP(OP_CMD_XFER, xfer) |
+ FIELD_PREP(OP_CMD_CNT, size) |
+ FIELD_PREP(OP_CMD_SIG, sync) |
+ FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
+ FIELD_PREP(OP_CMD_MODE, mode);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+ u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync)
+{
+ struct nfp_insn_re_regs reg;
+ int err;
+
+ err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
+ if (err) {
+ nfp_prog->error = err;
+ return;
+ }
+ if (reg.swap) {
+ pr_err("cmd can't swap arguments\n");
+ nfp_prog->error = -EFAULT;
+ return;
+ }
+
+ __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
+}
+
+static void
+__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
+ enum br_ctx_signal_state css, u16 addr, u8 defer)
+{
+ u16 addr_lo, addr_hi;
+ u64 insn;
+
+ addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+ addr_hi = addr != addr_lo;
+
+ insn = OP_BR_BASE |
+ FIELD_PREP(OP_BR_MASK, mask) |
+ FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
+ FIELD_PREP(OP_BR_CSS, css) |
+ FIELD_PREP(OP_BR_DEFBR, defer) |
+ FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
+ FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer)
+{
+ if (defer > 2) {
+ pr_err("BUG: branch defer out of bounds %d\n", defer);
+ nfp_prog->error = -EFAULT;
+ return;
+ }
+ __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer);
+}
+
+static void
+emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
+{
+ __emit_br(nfp_prog, mask,
+ mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
+ BR_CSS_NONE, addr, defer);
+}
+
+static void
+__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8,
+ u8 byte, bool equal, u16 addr, u8 defer)
+{
+ u16 addr_lo, addr_hi;
+ u64 insn;
+
+ addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO));
+ addr_hi = addr != addr_lo;
+
+ insn = OP_BBYTE_BASE |
+ FIELD_PREP(OP_BB_A_SRC, areg) |
+ FIELD_PREP(OP_BB_BYTE, byte) |
+ FIELD_PREP(OP_BB_B_SRC, breg) |
+ FIELD_PREP(OP_BB_I8, imm8) |
+ FIELD_PREP(OP_BB_EQ, equal) |
+ FIELD_PREP(OP_BB_DEFBR, defer) |
+ FIELD_PREP(OP_BB_ADDR_LO, addr_lo) |
+ FIELD_PREP(OP_BB_ADDR_HI, addr_hi);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_br_byte_neq(struct nfp_prog *nfp_prog,
+ u32 dst, u8 imm, u8 byte, u16 addr, u8 defer)
+{
+ struct nfp_insn_re_regs reg;
+ int err;
+
+ err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), &reg, true);
+ if (err) {
+ nfp_prog->error = err;
+ return;
+ }
+
+ __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr,
+ defer);
+}
+
+static void
+__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
+ enum immed_width width, bool invert,
+ enum immed_shift shift, bool wr_both)
+{
+ u64 insn;
+
+ insn = OP_IMMED_BASE |
+ FIELD_PREP(OP_IMMED_A_SRC, areg) |
+ FIELD_PREP(OP_IMMED_B_SRC, breg) |
+ FIELD_PREP(OP_IMMED_IMM, imm_hi) |
+ FIELD_PREP(OP_IMMED_WIDTH, width) |
+ FIELD_PREP(OP_IMMED_INV, invert) |
+ FIELD_PREP(OP_IMMED_SHIFT, shift) |
+ FIELD_PREP(OP_IMMED_WR_AB, wr_both);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
+ enum immed_width width, bool invert, enum immed_shift shift)
+{
+ struct nfp_insn_ur_regs reg;
+ int err;
+
+ if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) {
+ nfp_prog->error = -EFAULT;
+ return;
+ }
+
+ err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
+ if (err) {
+ nfp_prog->error = err;
+ return;
+ }
+
+ __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
+ invert, shift, reg.wr_both);
+}
+
+static void
+__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+ enum shf_sc sc, u8 shift,
+ u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both)
+{
+ u64 insn;
+
+ if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
+ nfp_prog->error = -EFAULT;
+ return;
+ }
+
+ if (sc == SHF_SC_L_SHF)
+ shift = 32 - shift;
+
+ insn = OP_SHF_BASE |
+ FIELD_PREP(OP_SHF_A_SRC, areg) |
+ FIELD_PREP(OP_SHF_SC, sc) |
+ FIELD_PREP(OP_SHF_B_SRC, breg) |
+ FIELD_PREP(OP_SHF_I8, i8) |
+ FIELD_PREP(OP_SHF_SW, sw) |
+ FIELD_PREP(OP_SHF_DST, dst) |
+ FIELD_PREP(OP_SHF_SHIFT, shift) |
+ FIELD_PREP(OP_SHF_OP, op) |
+ FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
+ FIELD_PREP(OP_SHF_WR_AB, wr_both);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
+ enum shf_sc sc, u8 shift)
+{
+ struct nfp_insn_re_regs reg;
+ int err;
+
+ err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
+ if (err) {
+ nfp_prog->error = err;
+ return;
+ }
+
+ __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
+ reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+ u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both)
+{
+ u64 insn;
+
+ insn = OP_ALU_BASE |
+ FIELD_PREP(OP_ALU_A_SRC, areg) |
+ FIELD_PREP(OP_ALU_B_SRC, breg) |
+ FIELD_PREP(OP_ALU_DST, dst) |
+ FIELD_PREP(OP_ALU_SW, swap) |
+ FIELD_PREP(OP_ALU_OP, op) |
+ FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
+ FIELD_PREP(OP_ALU_WR_AB, wr_both);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
+{
+ struct nfp_insn_ur_regs reg;
+ int err;
+
+ err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
+ if (err) {
+ nfp_prog->error = err;
+ return;
+ }
+
+ __emit_alu(nfp_prog, reg.dst, reg.dst_ab,
+ reg.areg, op, reg.breg, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
+ u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
+ bool zero, bool swap, bool wr_both)
+{
+ u64 insn;
+
+ insn = OP_LDF_BASE |
+ FIELD_PREP(OP_LDF_A_SRC, areg) |
+ FIELD_PREP(OP_LDF_SC, sc) |
+ FIELD_PREP(OP_LDF_B_SRC, breg) |
+ FIELD_PREP(OP_LDF_I8, imm8) |
+ FIELD_PREP(OP_LDF_SW, swap) |
+ FIELD_PREP(OP_LDF_ZF, zero) |
+ FIELD_PREP(OP_LDF_BMASK, bmask) |
+ FIELD_PREP(OP_LDF_SHF, shift) |
+ FIELD_PREP(OP_LDF_WR_AB, wr_both);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift,
+ u32 dst, u8 bmask, u32 src, bool zero)
+{
+ struct nfp_insn_re_regs reg;
+ int err;
+
+ err = swreg_to_restricted(reg_none(), dst, src, &reg, true);
+ if (err) {
+ nfp_prog->error = err;
+ return;
+ }
+
+ __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
+ reg.i8, zero, reg.swap, reg.wr_both);
+}
+
+static void
+emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src,
+ enum shf_sc sc, u8 shift)
+{
+ emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false);
+}
+
+/* --- Wrappers --- */
+static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
+{
+ if (!(imm & 0xffff0000)) {
+ *val = imm;
+ *shift = IMMED_SHIFT_0B;
+ } else if (!(imm & 0xff0000ff)) {
+ *val = imm >> 8;
+ *shift = IMMED_SHIFT_1B;
+ } else if (!(imm & 0x0000ffff)) {
+ *val = imm >> 16;
+ *shift = IMMED_SHIFT_2B;
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
+static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
+{
+ enum immed_shift shift;
+ u16 val;
+
+ if (pack_immed(imm, &val, &shift)) {
+ emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
+ } else if (pack_immed(~imm, &val, &shift)) {
+ emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
+ } else {
+ emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
+ false, IMMED_SHIFT_0B);
+ emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
+ false, IMMED_SHIFT_2B);
+ }
+}
+
+/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+ if (FIELD_FIT(UR_REG_IMM_MAX, imm))
+ return reg_imm(imm);
+
+ wrp_immed(nfp_prog, tmp_reg, imm);
+ return tmp_reg;
+}
+
+/* re_load_imm_any() - encode immediate or use tmp register (restricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+ if (FIELD_FIT(RE_REG_IMM_MAX, imm))
+ return reg_imm(imm);
+
+ wrp_immed(nfp_prog, tmp_reg, imm);
+ return tmp_reg;
+}
+
+static void
+wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
+ enum br_special special)
+{
+ emit_br(nfp_prog, mask, 0, 0);
+
+ nfp_prog->prog[nfp_prog->prog_len - 1] |=
+ FIELD_PREP(OP_BR_SPECIAL, special);
+}
+
+static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
+{
+ emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src));
+}
+
+static int
+construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset,
+ u16 src, bool src_valid, u8 size)
+{
+ unsigned int i;
+ u16 shift, sz;
+ u32 tmp_reg;
+
+ /* We load the value from the address indicated in @offset and then
+ * shift out the data we don't need. Note: this is big endian!
+ */
+ sz = size < 4 ? 4 : size;
+ shift = size < 4 ? 4 - size : 0;
+
+ if (src_valid) {
+ /* Calculate the true offset (src_reg + imm) */
+ tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+ emit_alu(nfp_prog, imm_both(nfp_prog),
+ reg_a(src), ALU_OP_ADD, tmp_reg);
+ /* Check packet length (size guaranteed to fit b/c it's u8) */
+ emit_alu(nfp_prog, imm_a(nfp_prog),
+ imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
+ emit_alu(nfp_prog, reg_none(),
+ NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog));
+ wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+ /* Load data */
+ emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+ pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true);
+ } else {
+ /* Check packet length */
+ tmp_reg = ur_load_imm_any(nfp_prog, offset + size,
+ imm_a(nfp_prog));
+ emit_alu(nfp_prog, reg_none(),
+ NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg);
+ wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+ /* Load data */
+ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+ emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+ pkt_reg(nfp_prog), tmp_reg, sz - 1, true);
+ }
+
+ i = 0;
+ if (shift)
+ emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE,
+ reg_xfer(0), SHF_SC_R_SHF, shift * 8);
+ else
+ for (; i * 4 < size; i++)
+ emit_alu(nfp_prog, reg_both(i),
+ reg_none(), ALU_OP_NONE, reg_xfer(i));
+
+ if (i < 2)
+ wrp_immed(nfp_prog, reg_both(1), 0);
+
+ return 0;
+}
+
+static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
+{
+ return construct_data_ind_ld(nfp_prog, offset, 0, false, size);
+}
+
+static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
+{
+ emit_alu(nfp_prog, NFP_BPF_ABI_MARK,
+ reg_none(), ALU_OP_NONE, reg_b(src));
+ emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS,
+ NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK));
+
+ return 0;
+}
+
+static void
+wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
+{
+ u32 tmp_reg;
+
+ if (alu_op == ALU_OP_AND) {
+ if (!imm)
+ wrp_immed(nfp_prog, reg_both(dst), 0);
+ if (!imm || !~imm)
+ return;
+ }
+ if (alu_op == ALU_OP_OR) {
+ if (!~imm)
+ wrp_immed(nfp_prog, reg_both(dst), ~0U);
+ if (!imm || !~imm)
+ return;
+ }
+ if (alu_op == ALU_OP_XOR) {
+ if (!~imm)
+ emit_alu(nfp_prog, reg_both(dst), reg_none(),
+ ALU_OP_NEG, reg_b(dst));
+ if (!imm || !~imm)
+ return;
+ }
+
+ tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+ emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
+}
+
+static int
+wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ enum alu_op alu_op, bool skip)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 imm = insn->imm; /* sign extend */
+
+ if (skip) {
+ meta->skip = true;
+ return 0;
+ }
+
+ wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
+ wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
+
+ return 0;
+}
+
+static int
+wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ enum alu_op alu_op)
+{
+ u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+ emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+ emit_alu(nfp_prog, reg_both(dst + 1),
+ reg_a(dst + 1), alu_op, reg_b(src + 1));
+
+ return 0;
+}
+
+static int
+wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ enum alu_op alu_op, bool skip)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ if (skip) {
+ meta->skip = true;
+ return 0;
+ }
+
+ wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
+static int
+wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ enum alu_op alu_op)
+{
+ u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+ emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+ wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
+static void
+wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
+ enum br_mask br_mask, u16 off)
+{
+ emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
+ emit_br(nfp_prog, br_mask, off, 0);
+}
+
+static int
+wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ enum alu_op alu_op, enum br_mask br_mask)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ if (insn->off < 0) /* TODO */
+ return -ENOTSUPP;
+
+ wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
+ insn->src_reg * 2, br_mask, insn->off);
+ wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
+ insn->src_reg * 2 + 1, br_mask, insn->off);
+
+ return 0;
+}
+
+static int
+wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ enum br_mask br_mask, bool swap)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 imm = insn->imm; /* sign extend */
+ u8 reg = insn->dst_reg * 2;
+ u32 tmp_reg;
+
+ if (insn->off < 0) /* TODO */
+ return -ENOTSUPP;
+
+ tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+ if (!swap)
+ emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
+ else
+ emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
+
+ tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+ if (!swap)
+ emit_alu(nfp_prog, reg_none(),
+ reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
+ else
+ emit_alu(nfp_prog, reg_none(),
+ tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
+
+ emit_br(nfp_prog, br_mask, insn->off, 0);
+
+ return 0;
+}
+
+static int
+wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ enum br_mask br_mask, bool swap)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2;
+
+ if (insn->off < 0) /* TODO */
+ return -ENOTSUPP;
+
+ if (swap) {
+ areg ^= breg;
+ breg ^= areg;
+ areg ^= breg;
+ }
+
+ emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
+ emit_alu(nfp_prog, reg_none(),
+ reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
+ emit_br(nfp_prog, br_mask, insn->off, 0);
+
+ return 0;
+}
+
+/* --- Callbacks --- */
+static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
+ wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1);
+
+ return 0;
+}
+
+static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ u64 imm = meta->insn.imm; /* sign extend */
+
+ wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
+ wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
+
+ return 0;
+}
+
+static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
+}
+
+static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+ reg_a(insn->dst_reg * 2), ALU_OP_ADD,
+ reg_b(insn->src_reg * 2));
+ emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+ reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
+ reg_b(insn->src_reg * 2 + 1));
+
+ return 0;
+}
+
+static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 imm = insn->imm; /* sign extend */
+
+ wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
+ wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
+
+ return 0;
+}
+
+static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+ reg_a(insn->dst_reg * 2), ALU_OP_SUB,
+ reg_b(insn->src_reg * 2));
+ emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+ reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
+ reg_b(insn->src_reg * 2 + 1));
+
+ return 0;
+}
+
+static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 imm = insn->imm; /* sign extend */
+
+ wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
+ wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
+
+ return 0;
+}
+
+static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ if (insn->imm != 32)
+ return 1; /* TODO */
+
+ wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2);
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0);
+
+ return 0;
+}
+
+static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ if (insn->imm != 32)
+ return 1; /* TODO */
+
+ wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1);
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
+static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
+static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
+static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
+}
+
+static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
+}
+
+static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
+}
+
+static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
+}
+
+static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
+}
+
+static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ if (!insn->imm)
+ return 1; /* TODO: zero shift means indirect */
+
+ emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
+ reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
+ SHF_SC_L_SHF, insn->imm);
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
+static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1),
+ meta->insn.imm);
+
+ return 0;
+}
+
+static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ meta->double_cb = imm_ld8_part2;
+ wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+
+ return 0;
+}
+
+static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return construct_data_ld(nfp_prog, meta->insn.imm, 1);
+}
+
+static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return construct_data_ld(nfp_prog, meta->insn.imm, 2);
+}
+
+static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return construct_data_ld(nfp_prog, meta->insn.imm, 4);
+}
+
+static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+ meta->insn.src_reg * 2, true, 1);
+}
+
+static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+ meta->insn.src_reg * 2, true, 2);
+}
+
+static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+ meta->insn.src_reg * 2, true, 4);
+}
+
+static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ if (meta->insn.off == offsetof(struct sk_buff, len))
+ emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN);
+ else
+ return -ENOTSUPP;
+
+ return 0;
+}
+
+static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ if (meta->insn.off == offsetof(struct sk_buff, mark))
+ return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
+
+ return -ENOTSUPP;
+}
+
+static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ if (meta->insn.off < 0) /* TODO */
+ return -ENOTSUPP;
+ emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
+
+ return 0;
+}
+
+static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 imm = insn->imm; /* sign extend */
+ u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1);
+ u32 tmp_reg;
+
+ if (insn->off < 0) /* TODO */
+ return -ENOTSUPP;
+
+ if (imm & ~0U) {
+ tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+ emit_alu(nfp_prog, imm_a(nfp_prog),
+ reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+ or1 = imm_a(nfp_prog);
+ }
+
+ if (imm >> 32) {
+ tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+ emit_alu(nfp_prog, imm_b(nfp_prog),
+ reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+ or2 = imm_b(nfp_prog);
+ }
+
+ emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
+ emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+ return 0;
+}
+
+static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 imm = insn->imm; /* sign extend */
+ u32 tmp_reg;
+
+ if (insn->off < 0) /* TODO */
+ return -ENOTSUPP;
+
+ if (!imm) {
+ meta->skip = true;
+ return 0;
+ }
+
+ if (imm & ~0U) {
+ tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+ emit_alu(nfp_prog, reg_none(),
+ reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
+ emit_br(nfp_prog, BR_BNE, insn->off, 0);
+ }
+
+ if (imm >> 32) {
+ tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+ emit_alu(nfp_prog, reg_none(),
+ reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
+ emit_br(nfp_prog, BR_BNE, insn->off, 0);
+ }
+
+ return 0;
+}
+
+static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 imm = insn->imm; /* sign extend */
+ u32 tmp_reg;
+
+ if (insn->off < 0) /* TODO */
+ return -ENOTSUPP;
+
+ if (!imm) {
+ emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
+ ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
+ emit_br(nfp_prog, BR_BNE, insn->off, 0);
+ }
+
+ tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+ emit_alu(nfp_prog, reg_none(),
+ reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+ emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+ tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+ emit_alu(nfp_prog, reg_none(),
+ reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+ emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+ return 0;
+}
+
+static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+
+ if (insn->off < 0) /* TODO */
+ return -ENOTSUPP;
+
+ emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
+ ALU_OP_XOR, reg_b(insn->src_reg * 2));
+ emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
+ ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
+ emit_alu(nfp_prog, reg_none(),
+ imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
+ emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+ return 0;
+}
+
+static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
+}
+
+static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
+}
+
+static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT);
+
+ return 0;
+}
+
+static const instr_cb_t instr_cb[256] = {
+ [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64,
+ [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64,
+ [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64,
+ [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64,
+ [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64,
+ [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64,
+ [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64,
+ [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64,
+ [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64,
+ [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
+ [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
+ [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
+ [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
+ [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
+ [BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
+ [BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
+ [BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
+ [BPF_ALU | BPF_XOR | BPF_K] = xor_imm,
+ [BPF_ALU | BPF_AND | BPF_X] = and_reg,
+ [BPF_ALU | BPF_AND | BPF_K] = and_imm,
+ [BPF_ALU | BPF_OR | BPF_X] = or_reg,
+ [BPF_ALU | BPF_OR | BPF_K] = or_imm,
+ [BPF_ALU | BPF_ADD | BPF_X] = add_reg,
+ [BPF_ALU | BPF_ADD | BPF_K] = add_imm,
+ [BPF_ALU | BPF_SUB | BPF_X] = sub_reg,
+ [BPF_ALU | BPF_SUB | BPF_K] = sub_imm,
+ [BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
+ [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
+ [BPF_LD | BPF_ABS | BPF_B] = data_ld1,
+ [BPF_LD | BPF_ABS | BPF_H] = data_ld2,
+ [BPF_LD | BPF_ABS | BPF_W] = data_ld4,
+ [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1,
+ [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2,
+ [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4,
+ [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4,
+ [BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
+ [BPF_JMP | BPF_JA | BPF_K] = jump,
+ [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
+ [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm,
+ [BPF_JMP | BPF_JGE | BPF_K] = jge_imm,
+ [BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
+ [BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
+ [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
+ [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg,
+ [BPF_JMP | BPF_JGE | BPF_X] = jge_reg,
+ [BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
+ [BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
+ [BPF_JMP | BPF_EXIT] = goto_out,
+};
+
+/* --- Misc code --- */
+static void br_set_offset(u64 *instr, u16 offset)
+{
+ u16 addr_lo, addr_hi;
+
+ addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+ addr_hi = offset != addr_lo;
+ *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO);
+ *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+ *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo);
+}
+
+/* --- Assembler logic --- */
+static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta, *next;
+ u32 off, br_idx;
+ u32 idx;
+
+ nfp_for_each_insn_walk2(nfp_prog, meta, next) {
+ if (meta->skip)
+ continue;
+ if (BPF_CLASS(meta->insn.code) != BPF_JMP)
+ continue;
+
+ br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1;
+ if (!nfp_is_br(nfp_prog->prog[br_idx])) {
+ pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
+ br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
+ return -ELOOP;
+ }
+ /* Leave special branches for later */
+ if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]))
+ continue;
+
+ /* Find the target offset in assembler realm */
+ off = meta->insn.off;
+ if (!off) {
+ pr_err("Fixup found zero offset!!\n");
+ return -ELOOP;
+ }
+
+ while (off && nfp_meta_has_next(nfp_prog, next)) {
+ next = nfp_meta_next(next);
+ off--;
+ }
+ if (off) {
+ pr_err("Fixup found too large jump!! %d\n", off);
+ return -ELOOP;
+ }
+
+ if (next->skip) {
+ pr_err("Branch landing on removed instruction!!\n");
+ return -ELOOP;
+ }
+
+ for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off);
+ idx <= br_idx; idx++) {
+ if (!nfp_is_br(nfp_prog->prog[idx]))
+ continue;
+ br_set_offset(&nfp_prog->prog[idx], next->off);
+ }
+ }
+
+ /* Fixup 'goto out's separately, they can be scattered around */
+ for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) {
+ enum br_special special;
+
+ if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE)
+ continue;
+
+ special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]);
+ switch (special) {
+ case OP_BR_NORMAL:
+ break;
+ case OP_BR_GO_OUT:
+ br_set_offset(&nfp_prog->prog[br_idx],
+ nfp_prog->tgt_out);
+ break;
+ case OP_BR_GO_ABORT:
+ br_set_offset(&nfp_prog->prog[br_idx],
+ nfp_prog->tgt_abort);
+ break;
+ }
+
+ nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL;
+ }
+
+ return 0;
+}
+
+static void nfp_intro(struct nfp_prog *nfp_prog)
+{
+ emit_alu(nfp_prog, pkt_reg(nfp_prog),
+ reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+}
+
+static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
+{
+ const u8 act2code[] = {
+ [NN_ACT_TC_DROP] = 0x22,
+ [NN_ACT_TC_REDIR] = 0x24
+ };
+ /* Target for aborts */
+ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+ wrp_immed(nfp_prog, reg_both(0), 0);
+
+ /* Target for normal exits */
+ nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+ /* Legacy TC mode:
+ * 0 0x11 -> pass, count as stat0
+ * -1 drop 0x22 -> drop, count as stat1
+ * redir 0x24 -> redir, count as stat1
+ * ife mark 0x21 -> pass, count as stat1
+ * ife + tx 0x24 -> redir, count as stat1
+ */
+ emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2);
+ emit_alu(nfp_prog, reg_a(0),
+ reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+ emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+ emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1);
+ emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]),
+ SHF_SC_L_SHF, 16);
+}
+
+static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
+{
+ /* TC direct-action mode:
+ * 0,1 ok NOT SUPPORTED[1]
+ * 2 drop 0x22 -> drop, count as stat1
+ * 4,5 nuke 0x02 -> drop
+ * 7 redir 0x44 -> redir, count as stat2
+ * * unspec 0x11 -> pass, count as stat0
+ *
+ * [1] We can't support OK and RECLASSIFY because we can't tell TC
+ * the exact decision made. We are forced to support UNSPEC
+ * to handle aborts so that's the only one we handle for passing
+ * packets up the stack.
+ */
+ /* Target for aborts */
+ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+
+ emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+ emit_alu(nfp_prog, reg_a(0),
+ reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+ emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+ /* Target for normal exits */
+ nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+
+ /* if R0 > 7 jump to abort */
+ emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
+ emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
+ emit_alu(nfp_prog, reg_a(0),
+ reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+
+ wrp_immed(nfp_prog, reg_b(2), 0x41221211);
+ wrp_immed(nfp_prog, reg_b(3), 0x41001211);
+
+ emit_shf(nfp_prog, reg_a(1),
+ reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
+
+ emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+ emit_shf(nfp_prog, reg_a(2),
+ reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
+
+ emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+ emit_shf(nfp_prog, reg_b(2),
+ reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
+
+ emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+ emit_shf(nfp_prog, reg_b(2),
+ reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
+ emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
+}
+
+static void nfp_outro(struct nfp_prog *nfp_prog)
+{
+ switch (nfp_prog->act) {
+ case NN_ACT_DIRECT:
+ nfp_outro_tc_da(nfp_prog);
+ break;
+ case NN_ACT_TC_DROP:
+ case NN_ACT_TC_REDIR:
+ nfp_outro_tc_legacy(nfp_prog);
+ break;
+ }
+}
+
+static int nfp_translate(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta;
+ int err;
+
+ nfp_intro(nfp_prog);
+ if (nfp_prog->error)
+ return nfp_prog->error;
+
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
+ instr_cb_t cb = instr_cb[meta->insn.code];
+
+ meta->off = nfp_prog_current_offset(nfp_prog);
+
+ if (meta->skip) {
+ nfp_prog->n_translated++;
+ continue;
+ }
+
+ if (nfp_meta_has_prev(nfp_prog, meta) &&
+ nfp_meta_prev(meta)->double_cb)
+ cb = nfp_meta_prev(meta)->double_cb;
+ if (!cb)
+ return -ENOENT;
+ err = cb(nfp_prog, meta);
+ if (err)
+ return err;
+
+ nfp_prog->n_translated++;
+ }
+
+ nfp_outro(nfp_prog);
+ if (nfp_prog->error)
+ return nfp_prog->error;
+
+ return nfp_fixup_branches(nfp_prog);
+}
+
+static int
+nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
+ unsigned int cnt)
+{
+ unsigned int i;
+
+ for (i = 0; i < cnt; i++) {
+ struct nfp_insn_meta *meta;
+
+ meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+ if (!meta)
+ return -ENOMEM;
+
+ meta->insn = prog[i];
+ meta->n = i;
+
+ list_add_tail(&meta->l, &nfp_prog->insns);
+ }
+
+ return 0;
+}
+
+/* --- Optimizations --- */
+static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta;
+
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
+ struct bpf_insn insn = meta->insn;
+
+ /* Programs converted from cBPF start with register xoring */
+ if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
+ insn.src_reg == insn.dst_reg)
+ continue;
+
+ /* Programs start with R6 = R1 but we ignore the skb pointer */
+ if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
+ insn.src_reg == 1 && insn.dst_reg == 6)
+ meta->skip = true;
+
+ /* Return as soon as something doesn't match */
+ if (!meta->skip)
+ return;
+ }
+}
+
+/* Try to rename registers so that program uses only low ones */
+static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog)
+{
+ bool reg_used[MAX_BPF_REG] = {};
+ u8 tgt_reg[MAX_BPF_REG] = {};
+ struct nfp_insn_meta *meta;
+ unsigned int i, j;
+
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
+ if (meta->skip)
+ continue;
+
+ reg_used[meta->insn.src_reg] = true;
+ reg_used[meta->insn.dst_reg] = true;
+ }
+
+ for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) {
+ if (!reg_used[i])
+ continue;
+
+ tgt_reg[i] = j++;
+ }
+ nfp_prog->num_regs = j;
+
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
+ meta->insn.src_reg = tgt_reg[meta->insn.src_reg];
+ meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg];
+ }
+
+ return 0;
+}
+
+/* Remove masking after load since our load guarantees this is not needed */
+static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta1, *meta2;
+ const s32 exp_mask[] = {
+ [BPF_B] = 0x000000ffU,
+ [BPF_H] = 0x0000ffffU,
+ [BPF_W] = 0xffffffffU,
+ };
+
+ nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+ struct bpf_insn insn, next;
+
+ insn = meta1->insn;
+ next = meta2->insn;
+
+ if (BPF_CLASS(insn.code) != BPF_LD)
+ continue;
+ if (BPF_MODE(insn.code) != BPF_ABS &&
+ BPF_MODE(insn.code) != BPF_IND)
+ continue;
+
+ if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
+ continue;
+
+ if (!exp_mask[BPF_SIZE(insn.code)])
+ continue;
+ if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
+ continue;
+
+ if (next.src_reg || next.dst_reg)
+ continue;
+
+ meta2->skip = true;
+ }
+}
+
+static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta1, *meta2, *meta3;
+
+ nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
+ struct bpf_insn insn, next1, next2;
+
+ insn = meta1->insn;
+ next1 = meta2->insn;
+ next2 = meta3->insn;
+
+ if (BPF_CLASS(insn.code) != BPF_LD)
+ continue;
+ if (BPF_MODE(insn.code) != BPF_ABS &&
+ BPF_MODE(insn.code) != BPF_IND)
+ continue;
+ if (BPF_SIZE(insn.code) != BPF_W)
+ continue;
+
+ if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
+ next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
+ !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
+ next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
+ continue;
+
+ if (next1.src_reg || next1.dst_reg ||
+ next2.src_reg || next2.dst_reg)
+ continue;
+
+ if (next1.imm != 0x20 || next2.imm != 0x20)
+ continue;
+
+ meta2->skip = true;
+ meta3->skip = true;
+ }
+}
+
+static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
+{
+ int ret;
+
+ nfp_bpf_opt_reg_init(nfp_prog);
+
+ ret = nfp_bpf_opt_reg_rename(nfp_prog);
+ if (ret)
+ return ret;
+
+ nfp_bpf_opt_ld_mask(nfp_prog);
+ nfp_bpf_opt_ld_shift(nfp_prog);
+
+ return 0;
+}
+
+/**
+ * nfp_bpf_jit() - translate BPF code into NFP assembly
+ * @filter: kernel BPF filter struct
+ * @prog_mem: memory to store assembler instructions
+ * @act: action attached to this eBPF program
+ * @prog_start: offset of the first instruction when loaded
+ * @prog_done: where to jump on exit
+ * @prog_sz: size of @prog_mem in instructions
+ * @res: achieved parameters of translation results
+ */
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem,
+ enum nfp_bpf_action_type act,
+ unsigned int prog_start, unsigned int prog_done,
+ unsigned int prog_sz, struct nfp_bpf_result *res)
+{
+ struct nfp_prog *nfp_prog;
+ int ret;
+
+ nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL);
+ if (!nfp_prog)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&nfp_prog->insns);
+ nfp_prog->act = act;
+ nfp_prog->start_off = prog_start;
+ nfp_prog->tgt_done = prog_done;
+
+ ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len);
+ if (ret)
+ goto out;
+
+ ret = nfp_prog_verify(nfp_prog, filter);
+ if (ret)
+ goto out;
+
+ ret = nfp_bpf_optimize(nfp_prog);
+ if (ret)
+ goto out;
+
+ if (nfp_prog->num_regs <= 7)
+ nfp_prog->regs_per_thread = 16;
+ else
+ nfp_prog->regs_per_thread = 32;
+
+ nfp_prog->prog = prog_mem;
+ nfp_prog->__prog_alloc_len = prog_sz;
+
+ ret = nfp_translate(nfp_prog);
+ if (ret) {
+ pr_err("Translation failed with error %d (translated: %u)\n",
+ ret, nfp_prog->n_translated);
+ ret = -EINVAL;
+ }
+
+ res->n_instr = nfp_prog->prog_len;
+ res->dense_mode = nfp_prog->num_regs <= 7;
+out:
+ nfp_prog_free(nfp_prog);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
new file mode 100644
index 000000000000..144cae87f63a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "NFP net bpf: " fmt
+
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/kernel.h>
+#include <linux/pkt_cls.h>
+
+#include "nfp_bpf.h"
+
+/* Analyzer/verifier definitions */
+struct nfp_bpf_analyzer_priv {
+ struct nfp_prog *prog;
+ struct nfp_insn_meta *meta;
+};
+
+static struct nfp_insn_meta *
+nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int insn_idx, unsigned int n_insns)
+{
+ unsigned int forward, backward, i;
+
+ backward = meta->n - insn_idx;
+ forward = insn_idx - meta->n;
+
+ if (min(forward, backward) > n_insns - insn_idx - 1) {
+ backward = n_insns - insn_idx - 1;
+ meta = nfp_prog_last_meta(nfp_prog);
+ }
+ if (min(forward, backward) > insn_idx && backward > insn_idx) {
+ forward = insn_idx;
+ meta = nfp_prog_first_meta(nfp_prog);
+ }
+
+ if (forward < backward)
+ for (i = 0; i < forward; i++)
+ meta = nfp_meta_next(meta);
+ else
+ for (i = 0; i < backward; i++)
+ meta = nfp_meta_prev(meta);
+
+ return meta;
+}
+
+static int
+nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
+ const struct bpf_verifier_env *env)
+{
+ const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+
+ if (reg0->type != CONST_IMM) {
+ pr_info("unsupported exit state: %d, imm: %llx\n",
+ reg0->type, reg0->imm);
+ return -EINVAL;
+ }
+
+ if (nfp_prog->act != NN_ACT_DIRECT &&
+ reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+ pr_info("unsupported exit state: %d, imm: %llx\n",
+ reg0->type, reg0->imm);
+ return -EINVAL;
+ }
+
+ if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
+ reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
+ reg0->imm != TC_ACT_QUEUED) {
+ pr_info("unsupported exit state: %d, imm: %llx\n",
+ reg0->type, reg0->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
+ const struct bpf_verifier_env *env, u8 reg)
+{
+ if (env->cur_state.regs[reg].type != PTR_TO_CTX)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+{
+ struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv;
+ struct nfp_insn_meta *meta = priv->meta;
+
+ meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len);
+ priv->meta = meta;
+
+ if (meta->insn.src_reg == BPF_REG_10 ||
+ meta->insn.dst_reg == BPF_REG_10) {
+ pr_err("stack not yet supported\n");
+ return -EINVAL;
+ }
+ if (meta->insn.src_reg >= MAX_BPF_REG ||
+ meta->insn.dst_reg >= MAX_BPF_REG) {
+ pr_err("program uses extended registers - jit hardening?\n");
+ return -EINVAL;
+ }
+
+ if (meta->insn.code == (BPF_JMP | BPF_EXIT))
+ return nfp_bpf_check_exit(priv->prog, env);
+
+ if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
+ return nfp_bpf_check_ctx_ptr(priv->prog, env,
+ meta->insn.src_reg);
+ if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
+ return nfp_bpf_check_ctx_ptr(priv->prog, env,
+ meta->insn.dst_reg);
+
+ return 0;
+}
+
+static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+ .insn_hook = nfp_verify_insn,
+};
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog)
+{
+ struct nfp_bpf_analyzer_priv *priv;
+ int ret;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->prog = nfp_prog;
+ priv->meta = nfp_prog_first_meta(nfp_prog);
+
+ ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv);
+
+ kfree(priv);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 690635660195..ed824e11a1e3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -62,6 +62,9 @@
/* Max time to wait for NFP to respond on updates (in seconds) */
#define NFP_NET_POLL_TIMEOUT 5
+/* Interval for reading offloaded filter stats */
+#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100)
+
/* Bar allocation */
#define NFP_NET_CTRL_BAR 0
#define NFP_NET_Q0_BAR 2
@@ -220,7 +223,7 @@ struct nfp_net_tx_ring {
#define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11))
#define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10))
#define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9))
-#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8))
+#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8))
#define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7))
#define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6))
#define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5))
@@ -266,6 +269,8 @@ struct nfp_net_rx_desc {
};
};
+#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0)
+
struct nfp_net_rx_hash {
__be32 hash_type;
__be32 hash;
@@ -405,6 +410,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
fw_ver->minor == minor;
}
+struct nfp_stat_pair {
+ u64 pkts;
+ u64 bytes;
+};
+
/**
* struct nfp_net - NFP network device structure
* @pdev: Backpointer to PCI device
@@ -413,6 +423,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
* @is_vf: Is the driver attached to a VF?
* @is_nfp3200: Is the driver for a NFP-3200 card?
* @fw_loaded: Is the firmware loaded?
+ * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf
* @ctrl: Local copy of the control register/word.
* @fl_bufsz: Currently configured size of the freelist buffers
* @rx_offset: Offset in the RX buffers where packet data starts
@@ -427,6 +438,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
* @rss_cfg: RSS configuration
* @rss_key: RSS secret key
* @rss_itbl: RSS indirection table
+ * @rx_filter: Filter offload statistics - dropped packets/bytes
+ * @rx_filter_prev: Filter offload statistics - values from previous update
+ * @rx_filter_change: Jiffies when statistics last changed
+ * @rx_filter_stats_timer: Timer for polling filter offload statistics
+ * @rx_filter_lock: Lock protecting timer state changes (teardown)
* @max_tx_rings: Maximum number of TX rings supported by the Firmware
* @max_rx_rings: Maximum number of RX rings supported by the Firmware
* @num_tx_rings: Currently configured number of TX rings
@@ -473,6 +489,7 @@ struct nfp_net {
unsigned is_vf:1;
unsigned is_nfp3200:1;
unsigned fw_loaded:1;
+ unsigned bpf_offload_skip_sw:1;
u32 ctrl;
u32 fl_bufsz;
@@ -502,6 +519,11 @@ struct nfp_net {
u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
+ struct nfp_stat_pair rx_filter, rx_filter_prev;
+ unsigned long rx_filter_change;
+ struct timer_list rx_filter_stats_timer;
+ spinlock_t rx_filter_lock;
+
int max_tx_rings;
int max_rx_rings;
@@ -561,12 +583,28 @@ struct nfp_net {
/* Functions to read/write from/to a BAR
* Performs any endian conversion necessary.
*/
+static inline u16 nn_readb(struct nfp_net *nn, int off)
+{
+ return readb(nn->ctrl_bar + off);
+}
+
static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
{
writeb(val, nn->ctrl_bar + off);
}
-/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */
+/* NFP-3200 can't handle 16-bit accesses too well */
+static inline u16 nn_readw(struct nfp_net *nn, int off)
+{
+ WARN_ON_ONCE(nn->is_nfp3200);
+ return readw(nn->ctrl_bar + off);
+}
+
+static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
+{
+ WARN_ON_ONCE(nn->is_nfp3200);
+ writew(val, nn->ctrl_bar + off);
+}
static inline u32 nn_readl(struct nfp_net *nn, int off)
{
@@ -757,4 +795,9 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
}
#endif /* CONFIG_NFP_NET_DEBUG */
+void nfp_net_filter_stats_timer(unsigned long data);
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+ struct tc_cls_bpf_offload *cls_bpf);
+
#endif /* _NFP_NET_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 252e4924de0f..415691edcaa5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -60,6 +60,7 @@
#include <linux/ktime.h>
+#include <net/pkt_cls.h>
#include <net/vxlan.h>
#include "nfp_net_ctrl.h"
@@ -1292,38 +1293,72 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
}
}
-/**
- * nfp_net_set_hash() - Set SKB hash data
- * @netdev: adapter's net_device structure
- * @skb: SKB to set the hash data on
- * @rxd: RX descriptor
- *
- * The RSS hash and hash-type are pre-pended to the packet data.
- * Extract and decode it and set the skb fields.
- */
static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
- struct nfp_net_rx_desc *rxd)
+ unsigned int type, __be32 *hash)
{
- struct nfp_net_rx_hash *rx_hash;
-
- if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
- !(netdev->features & NETIF_F_RXHASH))
+ if (!(netdev->features & NETIF_F_RXHASH))
return;
- rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
-
- switch (be32_to_cpu(rx_hash->hash_type)) {
+ switch (type) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
- skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
+ skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
break;
default:
- skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
+ skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
break;
}
}
+static void
+nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
+ struct nfp_net_rx_desc *rxd)
+{
+ struct nfp_net_rx_hash *rx_hash;
+
+ if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
+ return;
+
+ rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
+
+ nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
+ &rx_hash->hash);
+}
+
+static void *
+nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
+ int meta_len)
+{
+ u8 *data = skb->data - meta_len;
+ u32 meta_info;
+
+ meta_info = get_unaligned_be32(data);
+ data += 4;
+
+ while (meta_info) {
+ switch (meta_info & NFP_NET_META_FIELD_MASK) {
+ case NFP_NET_META_HASH:
+ meta_info >>= NFP_NET_META_FIELD_SIZE;
+ nfp_net_set_hash(netdev, skb,
+ meta_info & NFP_NET_META_FIELD_MASK,
+ (__be32 *)data);
+ data += 4;
+ break;
+ case NFP_NET_META_MARK:
+ skb->mark = get_unaligned_be32(data);
+ data += 4;
+ break;
+ default:
+ return NULL;
+ }
+
+ meta_info >>= NFP_NET_META_FIELD_SIZE;
+ }
+
+ return data;
+}
+
/**
* nfp_net_rx() - receive up to @budget packets on @rx_ring
* @rx_ring: RX ring to receive from
@@ -1438,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
skb_reserve(skb, nn->rx_offset);
skb_put(skb, data_len - meta_len);
- nfp_net_set_hash(nn->netdev, skb, rxd);
-
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += skb->len;
u64_stats_update_end(&r_vec->rx_sync);
+ if (nn->fw_ver.major <= 3) {
+ nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+ } else if (meta_len) {
+ void *end;
+
+ end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
+ if (unlikely(end != skb->data)) {
+ u64_stats_update_begin(&r_vec->rx_sync);
+ r_vec->rx_drops++;
+ u64_stats_update_end(&r_vec->rx_sync);
+
+ dev_kfree_skb_any(skb);
+ nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+ continue;
+ }
+ }
+
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, nn->netdev);
@@ -2382,6 +2432,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
return stats;
}
+static bool nfp_net_ebpf_capable(struct nfp_net *nn)
+{
+ if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
+ nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
+ return true;
+ return false;
+}
+
+static int
+nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+ struct tc_to_netdev *tc)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+
+ if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+ return -ENOTSUPP;
+ if (proto != htons(ETH_P_ALL))
+ return -ENOTSUPP;
+
+ if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
+ return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+
+ return -EINVAL;
+}
+
static int nfp_net_set_features(struct net_device *netdev,
netdev_features_t features)
{
@@ -2436,6 +2511,11 @@ static int nfp_net_set_features(struct net_device *netdev,
new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
}
+ if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+ nn_err(nn, "Cannot disable HW TC offload while in use\n");
+ return -EBUSY;
+ }
+
nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
netdev->features, features, changed);
@@ -2585,6 +2665,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
.ndo_stop = nfp_net_netdev_close,
.ndo_start_xmit = nfp_net_tx,
.ndo_get_stats64 = nfp_net_stat64,
+ .ndo_setup_tc = nfp_net_setup_tc,
.ndo_tx_timeout = nfp_net_tx_timeout,
.ndo_set_rx_mode = nfp_net_set_rx_mode,
.ndo_change_mtu = nfp_net_change_mtu,
@@ -2610,7 +2691,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->fw_ver.resv, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu);
- nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
@@ -2627,7 +2708,8 @@ void nfp_net_info(struct nfp_net *nn)
nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
- nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "");
+ nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
+ nfp_net_ebpf_capable(nn) ? "BPF " : "");
}
/**
@@ -2670,10 +2752,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
spin_lock_init(&nn->reconfig_lock);
+ spin_lock_init(&nn->rx_filter_lock);
spin_lock_init(&nn->link_status_lock);
setup_timer(&nn->reconfig_timer,
nfp_net_reconfig_timer, (unsigned long)nn);
+ setup_timer(&nn->rx_filter_stats_timer,
+ nfp_net_filter_stats_timer, (unsigned long)nn);
return nn;
}
@@ -2795,6 +2880,9 @@ int nfp_net_netdev_init(struct net_device *netdev)
netdev->features = netdev->hw_features;
+ if (nfp_net_ebpf_capable(nn))
+ netdev->hw_features |= NETIF_F_HW_TC;
+
/* Advertise but disable TSO by default. */
netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index ad6c4e31cedd..93b10b441acb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -66,6 +66,13 @@
#define NFP_NET_LSO_MAX_HDR_SZ 255
/**
+ * Prepend field types
+ */
+#define NFP_NET_META_FIELD_SIZE 4
+#define NFP_NET_META_HASH 1 /* next field carries hash type */
+#define NFP_NET_META_MARK 2
+
+/**
* Hash type pre-pended when a RSS hash was computed
*/
#define NFP_NET_RSS_NONE 0
@@ -123,6 +130,7 @@
#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */
#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */
+#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */
#define NFP_NET_CFG_UPDATE 0x0004
#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */
#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */
@@ -134,6 +142,7 @@
#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */
#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */
#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */
+#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */
#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */
#define NFP_NET_CFG_TXRS_ENABLE 0x0008
#define NFP_NET_CFG_RXRS_ENABLE 0x0010
@@ -196,10 +205,37 @@
#define NFP_NET_CFG_VXLAN_SZ 0x0008
/**
- * 64B reserved for future use (0x0080 - 0x00c0)
+ * NFP6000 - BPF section
+ * @NFP_NET_CFG_BPF_ABI: BPF ABI version
+ * @NFP_NET_CFG_BPF_CAP: BPF capabilities
+ * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes
+ * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded
+ * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit
+ * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks
+ * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks
+ * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions
+ * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code
*/
-#define NFP_NET_CFG_RESERVED 0x0080
-#define NFP_NET_CFG_RESERVED_SZ 0x0040
+#define NFP_NET_CFG_BPF_ABI 0x0080
+#define NFP_NET_BPF_ABI 1
+#define NFP_NET_CFG_BPF_CAP 0x0081
+#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */
+#define NFP_NET_CFG_BPF_MAX_LEN 0x0082
+#define NFP_NET_CFG_BPF_START 0x0084
+#define NFP_NET_CFG_BPF_DONE 0x0086
+#define NFP_NET_CFG_BPF_STACK_SZ 0x0088
+#define NFP_NET_CFG_BPF_INL_MTU 0x0089
+#define NFP_NET_CFG_BPF_SIZE 0x008e
+#define NFP_NET_CFG_BPF_ADDR 0x0090
+#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */
+#define NFP_NET_CFG_BPF_CFG_MASK 7ULL
+#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK)
+
+/**
+ * 40B reserved for future use (0x0098 - 0x00c0)
+ */
+#define NFP_NET_CFG_RESERVED 0x0098
+#define NFP_NET_CFG_RESERVED_SZ 0x0028
/**
* RSS configuration (0x0100 - 0x01ac):
@@ -303,6 +339,15 @@
#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80)
#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88)
+#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90)
+#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98)
+#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0)
+#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8)
+#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0)
+#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8)
+#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0)
+#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8)
+
/**
* Per ring stats (0x1000 - 0x1800)
* options, 64bit per entry
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 4c9897220969..3418f2277e9d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
+
+ {"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
+ {"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
+ /* see comments in outro functions in nfp_bpf_jit.c to find out
+ * how different BPF modes use app-specific counters
+ */
+ {"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
+ {"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
+ {"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
+ {"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
+ {"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
+ {"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
};
#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
new file mode 100644
index 000000000000..43f42f842eda
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net_offload.c
+ * Netronome network device driver: TC offload functions for PF and VF
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "nfp_bpf.h"
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+void nfp_net_filter_stats_timer(unsigned long data)
+{
+ struct nfp_net *nn = (void *)data;
+ struct nfp_stat_pair latest;
+
+ spin_lock_bh(&nn->rx_filter_lock);
+
+ if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+ mod_timer(&nn->rx_filter_stats_timer,
+ jiffies + NFP_NET_STAT_POLL_IVL);
+
+ spin_unlock_bh(&nn->rx_filter_lock);
+
+ latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+ latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+
+ if (latest.pkts != nn->rx_filter.pkts)
+ nn->rx_filter_change = jiffies;
+
+ nn->rx_filter = latest;
+}
+
+static void nfp_net_bpf_stats_reset(struct nfp_net *nn)
+{
+ nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+ nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+ nn->rx_filter_prev = nn->rx_filter;
+ nn->rx_filter_change = jiffies;
+}
+
+static int
+nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+ struct tc_action *a;
+ LIST_HEAD(actions);
+ u64 bytes, pkts;
+
+ pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts;
+ bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes;
+ bytes -= pkts * ETH_HLEN;
+
+ nn->rx_filter_prev = nn->rx_filter;
+
+ preempt_disable();
+
+ tcf_exts_to_list(cls_bpf->exts, &actions);
+ list_for_each_entry(a, &actions, list)
+ tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change);
+
+ preempt_enable();
+
+ return 0;
+}
+
+static int
+nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+
+ /* TC direct action */
+ if (cls_bpf->exts_integrated) {
+ if (tc_no_actions(cls_bpf->exts))
+ return NN_ACT_DIRECT;
+
+ return -ENOTSUPP;
+ }
+
+ /* TC legacy mode */
+ if (!tc_single_action(cls_bpf->exts))
+ return -ENOTSUPP;
+
+ tcf_exts_to_list(cls_bpf->exts, &actions);
+ list_for_each_entry(a, &actions, list) {
+ if (is_tcf_gact_shot(a))
+ return NN_ACT_TC_DROP;
+
+ if (is_tcf_mirred_redirect(a) &&
+ tcf_mirred_ifindex(a) == nn->netdev->ifindex)
+ return NN_ACT_TC_REDIR;
+ }
+
+ return -ENOTSUPP;
+}
+
+static int
+nfp_net_bpf_offload_prepare(struct nfp_net *nn,
+ struct tc_cls_bpf_offload *cls_bpf,
+ struct nfp_bpf_result *res,
+ void **code, dma_addr_t *dma_addr, u16 max_instr)
+{
+ unsigned int code_sz = max_instr * sizeof(u64);
+ enum nfp_bpf_action_type act;
+ u16 start_off, done_off;
+ unsigned int max_mtu;
+ int ret;
+
+ ret = nfp_net_bpf_get_act(nn, cls_bpf);
+ if (ret < 0)
+ return ret;
+ act = ret;
+
+ max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
+ if (max_mtu < nn->netdev->mtu) {
+ nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
+ return -ENOTSUPP;
+ }
+
+ start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
+ done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
+
+ *code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr,
+ GFP_KERNEL);
+ if (!*code)
+ return -ENOMEM;
+
+ ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off,
+ max_instr, res);
+ if (ret)
+ goto out;
+
+ return 0;
+
+out:
+ dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr);
+ return ret;
+}
+
+static void
+nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
+ void *code, dma_addr_t dma_addr,
+ unsigned int code_sz, unsigned int n_instr,
+ bool dense_mode)
+{
+ u64 bpf_addr = dma_addr;
+ int err;
+
+ nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
+
+ if (dense_mode)
+ bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
+
+ nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr);
+ nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr);
+
+ /* Load up the JITed code */
+ err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
+ if (err)
+ nn_err(nn, "FW command error while loading BPF: %d\n", err);
+
+ /* Enable passing packets through BPF function */
+ nn->ctrl |= NFP_NET_CFG_CTRL_BPF;
+ nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+ err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+ if (err)
+ nn_err(nn, "FW command error while enabling BPF: %d\n", err);
+
+ dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
+
+ nfp_net_bpf_stats_reset(nn);
+ mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL);
+}
+
+static int nfp_net_bpf_stop(struct nfp_net *nn)
+{
+ if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
+ return 0;
+
+ spin_lock_bh(&nn->rx_filter_lock);
+ nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
+ spin_unlock_bh(&nn->rx_filter_lock);
+ nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+
+ del_timer_sync(&nn->rx_filter_stats_timer);
+ nn->bpf_offload_skip_sw = 0;
+
+ return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+}
+
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+ struct tc_cls_bpf_offload *cls_bpf)
+{
+ struct nfp_bpf_result res;
+ dma_addr_t dma_addr;
+ u16 max_instr;
+ void *code;
+ int err;
+
+ max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
+
+ switch (cls_bpf->command) {
+ case TC_CLSBPF_REPLACE:
+ /* There is nothing stopping us from implementing seamless
+ * replace but the simple method of loading I adopted in
+ * the firmware does not handle atomic replace (i.e. we have to
+ * stop the BPF offload and re-enable it). Leaking-in a few
+ * frames which didn't have BPF applied in the hardware should
+ * be fine if software fallback is available, though.
+ */
+ if (nn->bpf_offload_skip_sw)
+ return -EBUSY;
+
+ err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+ &dma_addr, max_instr);
+ if (err)
+ return err;
+
+ nfp_net_bpf_stop(nn);
+ nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+ dma_addr, max_instr * sizeof(u64),
+ res.n_instr, res.dense_mode);
+ return 0;
+
+ case TC_CLSBPF_ADD:
+ if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+ return -EBUSY;
+
+ err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+ &dma_addr, max_instr);
+ if (err)
+ return err;
+
+ nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+ dma_addr, max_instr * sizeof(u64),
+ res.n_instr, res.dense_mode);
+ return 0;
+
+ case TC_CLSBPF_DESTROY:
+ return nfp_net_bpf_stop(nn);
+
+ case TC_CLSBPF_STATS:
+ return nfp_net_bpf_stats_update(nn, cls_bpf);
+
+ default:
+ return -ENOTSUPP;
+ }
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index f7062cb648e1..2800bbf65a89 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n");
} else {
switch (fw_ver.major) {
- case 1 ... 3:
+ case 1 ... 4:
if (is_nfp3200) {
stride = 2;
tx_bar_no = NFP_NET_Q0_BAR;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index dd204d9704c6..77a5364f7a10 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1269,13 +1269,13 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
if (IS_ERR(ptp->phc_clock)) {
rc = PTR_ERR(ptp->phc_clock);
goto fail3;
- }
-
- INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
- ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
- if (!ptp->pps_workwq) {
- rc = -ENOMEM;
- goto fail4;
+ } else if (ptp->phc_clock) {
+ INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
+ ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
+ if (!ptp->pps_workwq) {
+ rc = -ENOMEM;
+ goto fail4;
+ }
}
}
ptp->nic_ts_enabled = false;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 170a18b61281..6e3b82972ce8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -187,7 +187,7 @@ int stmmac_ptp_register(struct stmmac_priv *priv)
if (IS_ERR(priv->ptp_clock)) {
priv->ptp_clock = NULL;
pr_err("ptp_clock_register() failed on %s\n", priv->dev->name);
- } else
+ } else if (priv->ptp_clock)
pr_debug("Added PTP HW clock successfully on %s\n",
priv->dev->name);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
new file mode 100644
index 000000000000..c5cb661712c9
--- /dev/null
+++ b/include/linux/bpf_verifier.h
@@ -0,0 +1,90 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_VERIFIER_H
+#define _LINUX_BPF_VERIFIER_H 1
+
+#include <linux/bpf.h> /* for enum bpf_reg_type */
+#include <linux/filter.h> /* for MAX_BPF_STACK */
+
+struct bpf_reg_state {
+ enum bpf_reg_type type;
+ union {
+ /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
+ s64 imm;
+
+ /* valid when type == PTR_TO_PACKET* */
+ struct {
+ u32 id;
+ u16 off;
+ u16 range;
+ };
+
+ /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
+ * PTR_TO_MAP_VALUE_OR_NULL
+ */
+ struct bpf_map *map_ptr;
+ };
+};
+
+enum bpf_stack_slot_type {
+ STACK_INVALID, /* nothing was stored in this stack slot */
+ STACK_SPILL, /* register spilled into stack */
+ STACK_MISC /* BPF program wrote some data into this slot */
+};
+
+#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
+
+/* state of the program:
+ * type of all registers and stack info
+ */
+struct bpf_verifier_state {
+ struct bpf_reg_state regs[MAX_BPF_REG];
+ u8 stack_slot_type[MAX_BPF_STACK];
+ struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
+};
+
+/* linked list of verifier states used to prune search */
+struct bpf_verifier_state_list {
+ struct bpf_verifier_state state;
+ struct bpf_verifier_state_list *next;
+};
+
+struct bpf_insn_aux_data {
+ enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
+};
+
+#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+
+struct bpf_verifier_env;
+struct bpf_ext_analyzer_ops {
+ int (*insn_hook)(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx);
+};
+
+/* single container for all structs
+ * one verifier_env per bpf_check() call
+ */
+struct bpf_verifier_env {
+ struct bpf_prog *prog; /* eBPF program being verified */
+ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
+ int stack_size; /* number of states to be processed */
+ struct bpf_verifier_state cur_state; /* current verifier state */
+ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
+ void *analyzer_priv; /* pointer to external analyzer's private data */
+ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+ u32 used_map_cnt; /* number of used maps */
+ u32 id_gen; /* used to generate unique reg IDs */
+ bool allow_ptr_leaks;
+ bool seen_direct_write;
+ struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
+};
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+ void *priv);
+
+#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a10d8d18ce19..69f242c71865 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -789,6 +789,7 @@ enum {
TC_SETUP_CLSU32,
TC_SETUP_CLSFLOWER,
TC_SETUP_MATCHALL,
+ TC_SETUP_CLSBPF,
};
struct tc_cls_u32_offload;
@@ -800,6 +801,7 @@ struct tc_to_netdev {
struct tc_cls_u32_offload *cls_u32;
struct tc_cls_flower_offload *cls_flower;
struct tc_cls_matchall_offload *cls_mall;
+ struct tc_cls_bpf_offload *cls_bpf;
};
};
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 6b15e168148a..5ad54fc66cf0 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -127,6 +127,11 @@ struct ptp_clock;
*
* @info: Structure describing the new clock.
* @parent: Pointer to the parent device of the new clock.
+ *
+ * Returns a valid pointer on success or PTR_ERR on failure. If PHC
+ * support is missing at the configuration level, this function
+ * returns NULL, and drivers are expected to gracefully handle that
+ * case separately.
*/
extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c6dab3f7457c..9bf60b556bd2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3085,6 +3085,7 @@ bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, int write_len);
+int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
int skb_vlan_pop(struct sk_buff *skb);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a459be5fe1c2..5ccaa4be7d96 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -486,4 +486,20 @@ struct tc_cls_matchall_offload {
unsigned long cookie;
};
+enum tc_clsbpf_command {
+ TC_CLSBPF_ADD,
+ TC_CLSBPF_REPLACE,
+ TC_CLSBPF_DESTROY,
+ TC_CLSBPF_STATS,
+};
+
+struct tc_cls_bpf_offload {
+ enum tc_clsbpf_command command;
+ struct tcf_exts *exts;
+ struct bpf_prog *prog;
+ const char *name;
+ bool exts_integrated;
+ u32 gen_flags;
+};
+
#endif
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 632e205ca54b..87a7f42e7639 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -83,9 +83,9 @@
#endif
/* Round an int up to the next multiple of 4. */
-#define WORD_ROUND(s) (((s)+3)&~3)
+#define SCTP_PAD4(s) (((s)+3)&~3)
/* Truncate to the previous multiple of 4. */
-#define WORD_TRUNC(s) ((s)&~3)
+#define SCTP_TRUNC4(s) ((s)&~3)
/*
* Function declarations.
@@ -433,7 +433,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
if (asoc->user_frag)
frag = min_t(int, frag, asoc->user_frag);
- frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
+ frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
return frag;
}
@@ -462,7 +462,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
for (pos.v = chunk->member;\
pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
- pos.v += WORD_ROUND(ntohs(pos.p->length)))
+ pos.v += SCTP_PAD4(ntohs(pos.p->length)))
#define sctp_walk_errors(err, chunk_hdr)\
_sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
@@ -472,7 +472,7 @@ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
sizeof(sctp_chunkhdr_t));\
(void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
ntohs(err->length) >= sizeof(sctp_errhdr_t); \
- err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
+ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
#define sctp_walk_fwdtsn(pos, chunk)\
_sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 8915b61bbf83..8fd715f806a2 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -396,6 +396,7 @@ enum {
TCA_BPF_FD,
TCA_BPF_NAME,
TCA_BPF_FLAGS,
+ TCA_BPF_FLAGS_GEN,
__TCA_BPF_MAX,
};
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index be72b6e3843b..bddb272b843f 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -16,6 +16,7 @@
#define TCA_VLAN_ACT_POP 1
#define TCA_VLAN_ACT_PUSH 2
+#define TCA_VLAN_ACT_MODIFY 3
struct tc_vlan {
tc_gen;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3a75ee3bdcd1..8c3f794c7028 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
@@ -126,77 +127,16 @@
* are set to NOT_INIT to indicate that they are no longer readable.
*/
-struct reg_state {
- enum bpf_reg_type type;
- union {
- /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
- s64 imm;
-
- /* valid when type == PTR_TO_PACKET* */
- struct {
- u32 id;
- u16 off;
- u16 range;
- };
-
- /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
- * PTR_TO_MAP_VALUE_OR_NULL
- */
- struct bpf_map *map_ptr;
- };
-};
-
-enum bpf_stack_slot_type {
- STACK_INVALID, /* nothing was stored in this stack slot */
- STACK_SPILL, /* register spilled into stack */
- STACK_MISC /* BPF program wrote some data into this slot */
-};
-
-#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
-
-/* state of the program:
- * type of all registers and stack info
- */
-struct verifier_state {
- struct reg_state regs[MAX_BPF_REG];
- u8 stack_slot_type[MAX_BPF_STACK];
- struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
-};
-
-/* linked list of verifier states used to prune search */
-struct verifier_state_list {
- struct verifier_state state;
- struct verifier_state_list *next;
-};
-
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
-struct verifier_stack_elem {
+struct bpf_verifier_stack_elem {
/* verifer state is 'st'
* before processing instruction 'insn_idx'
* and after processing instruction 'prev_insn_idx'
*/
- struct verifier_state st;
+ struct bpf_verifier_state st;
int insn_idx;
int prev_insn_idx;
- struct verifier_stack_elem *next;
-};
-
-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-
-/* single container for all structs
- * one verifier_env per bpf_check() call
- */
-struct verifier_env {
- struct bpf_prog *prog; /* eBPF program being verified */
- struct verifier_stack_elem *head; /* stack of verifier states to be processed */
- int stack_size; /* number of states to be processed */
- struct verifier_state cur_state; /* current verifier state */
- struct verifier_state_list **explored_states; /* search pruning optimization */
- struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
- u32 used_map_cnt; /* number of used maps */
- u32 id_gen; /* used to generate unique reg IDs */
- bool allow_ptr_leaks;
- bool seen_direct_write;
+ struct bpf_verifier_stack_elem *next;
};
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
@@ -249,9 +189,9 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET_END] = "pkt_end",
};
-static void print_verifier_state(struct verifier_state *state)
+static void print_verifier_state(struct bpf_verifier_state *state)
{
- struct reg_state *reg;
+ struct bpf_reg_state *reg;
enum bpf_reg_type t;
int i;
@@ -427,9 +367,9 @@ static void print_bpf_insn(struct bpf_insn *insn)
}
}
-static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
{
- struct verifier_stack_elem *elem;
+ struct bpf_verifier_stack_elem *elem;
int insn_idx;
if (env->head == NULL)
@@ -446,12 +386,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
return insn_idx;
}
-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
- int prev_insn_idx)
+static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx)
{
- struct verifier_stack_elem *elem;
+ struct bpf_verifier_stack_elem *elem;
- elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+ elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
if (!elem)
goto err;
@@ -477,7 +417,7 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void init_reg_state(struct reg_state *regs)
+static void init_reg_state(struct bpf_reg_state *regs)
{
int i;
@@ -493,7 +433,7 @@ static void init_reg_state(struct reg_state *regs)
regs[BPF_REG_1].type = PTR_TO_CTX;
}
-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
{
BUG_ON(regno >= MAX_BPF_REG);
regs[regno].type = UNKNOWN_VALUE;
@@ -506,7 +446,7 @@ enum reg_arg_type {
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
-static int check_reg_arg(struct reg_state *regs, u32 regno,
+static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
enum reg_arg_type t)
{
if (regno >= MAX_BPF_REG) {
@@ -566,8 +506,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
-static int check_stack_write(struct verifier_state *state, int off, int size,
- int value_regno)
+static int check_stack_write(struct bpf_verifier_state *state, int off,
+ int size, int value_regno)
{
int i;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -592,7 +532,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
} else {
/* regular write of data into stack */
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
- (struct reg_state) {};
+ (struct bpf_reg_state) {};
for (i = 0; i < size; i++)
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -600,7 +540,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
return 0;
}
-static int check_stack_read(struct verifier_state *state, int off, int size,
+static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
int value_regno)
{
u8 *slot_type;
@@ -641,7 +581,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size,
}
/* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct verifier_env *env, u32 regno, int off,
+static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -656,7 +596,7 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
#define MAX_PACKET_OFF 0xffff
-static bool may_access_direct_pkt_data(struct verifier_env *env,
+static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta)
{
switch (env->prog->type) {
@@ -673,11 +613,11 @@ static bool may_access_direct_pkt_data(struct verifier_env *env,
}
}
-static int check_packet_access(struct verifier_env *env, u32 regno, int off,
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *reg = &regs[regno];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *reg = &regs[regno];
off += reg->off;
if (off < 0 || size <= 0 || off + size > reg->range) {
@@ -689,9 +629,13 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
}
/* check access to 'struct bpf_context' fields */
-static int check_ctx_access(struct verifier_env *env, int off, int size,
+static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type)
{
+ /* for analyzer ctx accesses are already validated and converted */
+ if (env->analyzer_ops)
+ return 0;
+
if (env->prog->aux->ops->is_valid_access &&
env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
/* remember the offset of last byte accessed in ctx */
@@ -704,7 +648,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
return -EACCES;
}
-static bool is_pointer_value(struct verifier_env *env, int regno)
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
{
if (env->allow_ptr_leaks)
return false;
@@ -718,12 +662,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
}
}
-static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
- int off, int size)
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, int off, int size)
{
if (reg->type != PTR_TO_PACKET) {
if (off % size != 0) {
- verbose("misaligned access off %d size %d\n", off, size);
+ verbose("misaligned access off %d size %d\n",
+ off, size);
return -EACCES;
} else {
return 0;
@@ -764,12 +709,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
int bpf_size, enum bpf_access_type t,
int value_regno)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *reg = &state->regs[regno];
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *reg = &state->regs[regno];
int size, err = 0;
if (reg->type == PTR_TO_STACK)
@@ -855,9 +800,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
return err;
}
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@@ -891,12 +836,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized
*/
-static int check_stack_boundary(struct verifier_env *env, int regno,
+static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
int access_size, bool zero_size_allowed,
struct bpf_call_arg_meta *meta)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs;
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *regs = state->regs;
int off, i;
if (regs[regno].type != PTR_TO_STACK) {
@@ -935,11 +880,11 @@ static int check_stack_boundary(struct verifier_env *env, int regno,
return 0;
}
-static int check_func_arg(struct verifier_env *env, u32 regno,
+static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
{
- struct reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+ struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
enum bpf_reg_type expected_type, type = reg->type;
int err = 0;
@@ -1144,10 +1089,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
return count > 1 ? -EINVAL : 0;
}
-static void clear_all_pkt_pointers(struct verifier_env *env)
+static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs, *reg;
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
@@ -1167,12 +1112,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env)
}
}
-static int check_call(struct verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id)
{
- struct verifier_state *state = &env->cur_state;
+ struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
- struct reg_state *regs = state->regs;
- struct reg_state *reg;
+ struct bpf_reg_state *regs = state->regs;
+ struct bpf_reg_state *reg;
struct bpf_call_arg_meta meta;
bool changes_data;
int i, err;
@@ -1275,12 +1220,13 @@ static int check_call(struct verifier_env *env, int func_id)
return 0;
}
-static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
+static int check_packet_ptr_add(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
- struct reg_state *src_reg = &regs[insn->src_reg];
- struct reg_state tmp_reg;
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state tmp_reg;
s32 imm;
if (BPF_SRC(insn->code) == BPF_K) {
@@ -1348,10 +1294,10 @@ add_imm:
return 0;
}
-static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
u8 opcode = BPF_OP(insn->code);
s64 imm_log2;
@@ -1361,7 +1307,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
*/
if (BPF_SRC(insn->code) == BPF_X) {
- struct reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
dst_reg->imm && opcode == BPF_ADD) {
@@ -1450,11 +1396,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
- struct reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
u8 opcode = BPF_OP(insn->code);
/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
@@ -1471,9 +1418,9 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
}
/* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
+static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs, *dst_reg;
+ struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1647,10 +1594,10 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static void find_good_pkt_pointers(struct verifier_state *state,
- const struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+ struct bpf_reg_state *dst_reg)
{
- struct reg_state *regs = state->regs, *reg;
+ struct bpf_reg_state *regs = state->regs, *reg;
int i;
/* LLVM can generate two kind of checks:
@@ -1696,11 +1643,11 @@ static void find_good_pkt_pointers(struct verifier_state *state,
}
}
-static int check_cond_jmp_op(struct verifier_env *env,
+static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
- struct verifier_state *other_branch, *this_branch = &env->cur_state;
- struct reg_state *regs = this_branch->regs, *dst_reg;
+ struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
+ struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1762,7 +1709,7 @@ static int check_cond_jmp_op(struct verifier_env *env,
if (!other_branch)
return -EFAULT;
- /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+ /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
if (BPF_SRC(insn->code) == BPF_K &&
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
@@ -1804,9 +1751,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
}
/* verify BPF_LD_IMM64 instruction */
-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -1822,9 +1769,19 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
- if (insn->src_reg == 0)
- /* generic move 64-bit immediate into a register */
+ if (insn->src_reg == 0) {
+ /* generic move 64-bit immediate into a register,
+ * only analyzer needs to collect the ld_imm value.
+ */
+ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+
+ if (!env->analyzer_ops)
+ return 0;
+
+ regs[insn->dst_reg].type = CONST_IMM;
+ regs[insn->dst_reg].imm = imm;
return 0;
+ }
/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
@@ -1861,11 +1818,11 @@ static bool may_access_skb(enum bpf_prog_type type)
* Output:
* R0 - 8/16/32-bit skb data converted to cpu endianness
*/
-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
u8 mode = BPF_MODE(insn->code);
- struct reg_state *reg;
+ struct bpf_reg_state *reg;
int i, err;
if (!may_access_skb(env->prog->type)) {
@@ -1951,7 +1908,7 @@ enum {
BRANCH = 2,
};
-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
static int *insn_stack; /* stack of insns to process */
static int cur_stack; /* current stack index */
@@ -1962,7 +1919,7 @@ static int *insn_state;
* w - next instruction
* e - edge
*/
-static int push_insn(int t, int w, int e, struct verifier_env *env)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
{
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
return 0;
@@ -2003,7 +1960,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env)
/* non-recursive depth-first-search to detect loops in BPF program
* loop == back-edge in directed graph
*/
-static int check_cfg(struct verifier_env *env)
+static int check_cfg(struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -2112,7 +2069,8 @@ err_free:
/* the following conditions reduce the number of explored insns
* from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
*/
-static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
+static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+ struct bpf_reg_state *cur)
{
if (old->id != cur->id)
return false;
@@ -2187,9 +2145,10 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
-static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+static bool states_equal(struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur)
{
- struct reg_state *rold, *rcur;
+ struct bpf_reg_state *rold, *rcur;
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
@@ -2229,9 +2188,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
* the same, check that stored pointers types
* are the same as well.
* Ex: explored safe path could have stored
- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
* but current path has stored:
- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
* such verifier states are not equivalent.
* return false to continue verification of this path
*/
@@ -2242,10 +2201,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
return true;
}
-static int is_state_visited(struct verifier_env *env, int insn_idx)
+static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
- struct verifier_state_list *new_sl;
- struct verifier_state_list *sl;
+ struct bpf_verifier_state_list *new_sl;
+ struct bpf_verifier_state_list *sl;
sl = env->explored_states[insn_idx];
if (!sl)
@@ -2269,7 +2228,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
* it will be rejected. Since there are no loops, we won't be
* seeing this 'insn_idx' instruction again on the way to bpf_exit
*/
- new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+ new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
if (!new_sl)
return -ENOMEM;
@@ -2280,11 +2239,20 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
return 0;
}
-static int do_check(struct verifier_env *env)
+static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx)
+{
+ if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
+ return 0;
+
+ return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
+}
+
+static int do_check(struct bpf_verifier_env *env)
{
- struct verifier_state *state = &env->cur_state;
+ struct bpf_verifier_state *state = &env->cur_state;
struct bpf_insn *insns = env->prog->insnsi;
- struct reg_state *regs = state->regs;
+ struct bpf_reg_state *regs = state->regs;
int insn_cnt = env->prog->len;
int insn_idx, prev_insn_idx = 0;
int insn_processed = 0;
@@ -2338,13 +2306,17 @@ static int do_check(struct verifier_env *env)
print_bpf_insn(insn);
}
+ err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
+ if (err)
+ return err;
+
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
if (err)
return err;
} else if (class == BPF_LDX) {
- enum bpf_reg_type src_reg_type;
+ enum bpf_reg_type *prev_src_type, src_reg_type;
/* check for reserved fields is already done */
@@ -2374,16 +2346,18 @@ static int do_check(struct verifier_env *env)
continue;
}
- if (insn->imm == 0) {
+ prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_src_type == NOT_INIT) {
/* saw a valid insn
* dst_reg = *(u32 *)(src_reg + off)
- * use reserved 'imm' field to mark this insn
+ * save type to validate intersecting paths
*/
- insn->imm = src_reg_type;
+ *prev_src_type = src_reg_type;
- } else if (src_reg_type != insn->imm &&
+ } else if (src_reg_type != *prev_src_type &&
(src_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_src_type == PTR_TO_CTX)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
@@ -2396,7 +2370,7 @@ static int do_check(struct verifier_env *env)
}
} else if (class == BPF_STX) {
- enum bpf_reg_type dst_reg_type;
+ enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
@@ -2424,11 +2398,13 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
- if (insn->imm == 0) {
- insn->imm = dst_reg_type;
- } else if (dst_reg_type != insn->imm &&
+ prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_dst_type == NOT_INIT) {
+ *prev_dst_type = dst_reg_type;
+ } else if (dst_reg_type != *prev_dst_type &&
(dst_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_dst_type == PTR_TO_CTX)) {
verbose("same insn cannot be used with different pointers\n");
return -EINVAL;
}
@@ -2563,7 +2539,7 @@ static int check_map_prog_compatibility(struct bpf_map *map,
/* look for pseudo eBPF instructions that access map FDs and
* replace them with actual map pointers
*/
-static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -2660,7 +2636,7 @@ next_insn:
}
/* drop refcnt of maps used by the rejected program */
-static void release_maps(struct verifier_env *env)
+static void release_maps(struct bpf_verifier_env *env)
{
int i;
@@ -2669,7 +2645,7 @@ static void release_maps(struct verifier_env *env)
}
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
-static void convert_pseudo_ld_imm64(struct verifier_env *env)
+static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -2683,13 +2659,14 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
-static int convert_ctx_accesses(struct verifier_env *env)
+static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+ const int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16], *insn;
struct bpf_prog *new_prog;
enum bpf_access_type type;
- int i, insn_cnt, cnt;
+ int i, cnt, delta = 0;
if (ops->gen_prologue) {
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -2703,18 +2680,16 @@ static int convert_ctx_accesses(struct verifier_env *env)
if (!new_prog)
return -ENOMEM;
env->prog = new_prog;
+ delta += cnt - 1;
}
}
if (!ops->convert_ctx_access)
return 0;
- insn_cnt = env->prog->len;
- insn = env->prog->insnsi;
+ insn = env->prog->insnsi + delta;
for (i = 0; i < insn_cnt; i++, insn++) {
- u32 insn_delta;
-
if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
type = BPF_READ;
@@ -2724,11 +2699,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
else
continue;
- if (insn->imm != PTR_TO_CTX) {
- /* clear internal mark */
- insn->imm = 0;
+ if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
continue;
- }
cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
insn->off, insn_buf, env->prog);
@@ -2737,26 +2709,24 @@ static int convert_ctx_accesses(struct verifier_env *env)
return -EINVAL;
}
- new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt);
+ new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
+ cnt);
if (!new_prog)
return -ENOMEM;
- insn_delta = cnt - 1;
+ delta += cnt - 1;
/* keep walking new program and skip insns we just inserted */
env->prog = new_prog;
- insn = new_prog->insnsi + i + insn_delta;
-
- insn_cnt += insn_delta;
- i += insn_delta;
+ insn = new_prog->insnsi + i + delta;
}
return 0;
}
-static void free_states(struct verifier_env *env)
+static void free_states(struct bpf_verifier_env *env)
{
- struct verifier_state_list *sl, *sln;
+ struct bpf_verifier_state_list *sl, *sln;
int i;
if (!env->explored_states)
@@ -2779,19 +2749,24 @@ static void free_states(struct verifier_env *env)
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
char __user *log_ubuf = NULL;
- struct verifier_env *env;
+ struct bpf_verifier_env *env;
int ret = -EINVAL;
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
return -E2BIG;
- /* 'struct verifier_env' can be global, but since it's not small,
+ /* 'struct bpf_verifier_env' can be global, but since it's not small,
* allocate/free it every time bpf_check() is called
*/
- env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+ env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
+ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+ (*prog)->len);
+ ret = -ENOMEM;
+ if (!env->insn_aux_data)
+ goto err_free_env;
env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */
@@ -2810,12 +2785,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
/* log_* values have to be sane */
if (log_size < 128 || log_size > UINT_MAX >> 8 ||
log_level == 0 || log_ubuf == NULL)
- goto free_env;
+ goto err_unlock;
ret = -ENOMEM;
log_buf = vmalloc(log_size);
if (!log_buf)
- goto free_env;
+ goto err_unlock;
} else {
log_level = 0;
}
@@ -2825,7 +2800,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
goto skip_full_check;
env->explored_states = kcalloc(env->prog->len,
- sizeof(struct verifier_state_list *),
+ sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
if (!env->explored_states)
@@ -2884,14 +2859,67 @@ skip_full_check:
free_log_buf:
if (log_level)
vfree(log_buf);
-free_env:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
*/
release_maps(env);
*prog = env->prog;
+err_unlock:
+ mutex_unlock(&bpf_verifier_lock);
+ vfree(env->insn_aux_data);
+err_free_env:
kfree(env);
+ return ret;
+}
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+ void *priv)
+{
+ struct bpf_verifier_env *env;
+ int ret;
+
+ env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+ if (!env)
+ return -ENOMEM;
+
+ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+ prog->len);
+ ret = -ENOMEM;
+ if (!env->insn_aux_data)
+ goto err_free_env;
+ env->prog = prog;
+ env->analyzer_ops = ops;
+ env->analyzer_priv = priv;
+
+ /* grab the mutex to protect few globals used by verifier */
+ mutex_lock(&bpf_verifier_lock);
+
+ log_level = 0;
+
+ env->explored_states = kcalloc(env->prog->len,
+ sizeof(struct bpf_verifier_state_list *),
+ GFP_KERNEL);
+ ret = -ENOMEM;
+ if (!env->explored_states)
+ goto skip_full_check;
+
+ ret = check_cfg(env);
+ if (ret < 0)
+ goto skip_full_check;
+
+ env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
+ ret = do_check(env);
+
+skip_full_check:
+ while (pop_stack(env, NULL) >= 0);
+ free_states(env);
+
mutex_unlock(&bpf_verifier_lock);
+ vfree(env->insn_aux_data);
+err_free_env:
+ kfree(env);
return ret;
}
+EXPORT_SYMBOL_GPL(bpf_analyzer);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7bf82a28e10a..d36c7548952f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4522,8 +4522,10 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len)
}
EXPORT_SYMBOL(skb_ensure_writable);
-/* remove VLAN header from packet and update csum accordingly. */
-static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
+/* remove VLAN header from packet and update csum accordingly.
+ * expects a non skb_vlan_tag_present skb with a vlan tag payload
+ */
+int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
{
struct vlan_hdr *vhdr;
unsigned int offset = skb->data - skb_mac_header(skb);
@@ -4554,6 +4556,7 @@ pull:
return err;
}
+EXPORT_SYMBOL(__skb_vlan_pop);
int skb_vlan_pop(struct sk_buff *skb)
{
@@ -4564,9 +4567,7 @@ int skb_vlan_pop(struct sk_buff *skb)
if (likely(skb_vlan_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
- if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (unlikely(!eth_type_vlan(skb->protocol)))
return 0;
err = __skb_vlan_pop(skb, &vlan_tci);
@@ -4574,9 +4575,7 @@ int skb_vlan_pop(struct sk_buff *skb)
return err;
}
/* move next vlan tag to hw accel tag */
- if (likely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (likely(!eth_type_vlan(skb->protocol)))
return 0;
vlan_proto = skb->protocol;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d025a7804b5..478dfc539178 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -734,9 +734,16 @@ static void tcp_tsq_handler(struct sock *sk)
{
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
- TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->lost_out > tp->retrans_out &&
+ tp->snd_cwnd > tcp_packets_in_flight(tp))
+ tcp_xmit_retransmit_queue(sk);
+
+ tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
0, GFP_ATOMIC);
+ }
}
/*
* One tasklet per cpu tries to send more skbs.
@@ -2039,6 +2046,39 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
}
+/* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * (These limits are doubled for retransmits)
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
+ * Alas, some drivers / subsystems require a fair amount
+ * of queued bytes to ensure line rate.
+ * One example is wifi aggregation (802.11 AMPDU)
+ */
+static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+ unsigned int factor)
+{
+ unsigned int limit;
+
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+ limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ limit <<= factor;
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED, so we must
+ * test again the condition.
+ */
+ smp_mb__after_atomic();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ return true;
+ }
+ return false;
+}
+
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
@@ -2125,29 +2165,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
break;
- /* TCP Small Queues :
- * Control number of packets in qdisc/devices to two packets / or ~1 ms.
- * This allows for :
- * - better RTT estimation and ACK scheduling
- * - faster recovery
- * - high rates
- * Alas, some drivers / subsystems require a fair amount
- * of queued bytes to ensure line rate.
- * One example is wifi aggregation (802.11 AMPDU)
- */
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
-
- if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- /* It is possible TX completion already happened
- * before we set TSQ_THROTTLED, so we must
- * test again the condition.
- */
- smp_mb__after_atomic();
- if (atomic_read(&sk->sk_wmem_alloc) > limit)
- break;
- }
+ if (tcp_small_queue_check(sk, skb, 0))
+ break;
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
@@ -2847,6 +2866,9 @@ begin_fwd:
if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
continue;
+ if (tcp_small_queue_check(sk, skb, 1))
+ return;
+
if (tcp_retransmit_skb(sk, skb, segs))
return;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index ef36a56a02c6..4dedb96d1a06 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb,
++i, offset, sch->type, htons(sch->length),
sch->flags);
#endif
- offset += WORD_ROUND(ntohs(sch->length));
+ offset += SCTP_PAD4(ntohs(sch->length));
pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1c76387c5d9c..667dc382df82 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -204,6 +204,13 @@ out:
return retval;
}
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+ u64 lastuse)
+{
+ tcf_lastuse_update(&a->tcfa_tm);
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+}
+
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
int ref)
{
@@ -281,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = {
.type = TCA_ACT_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred,
+ .stats_update = tcf_stats_update,
.dump = tcf_mirred_dump,
.cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 59a8d3150ae2..a95c00b119da 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -30,6 +30,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
struct tcf_vlan *v = to_vlan(a);
int action;
int err;
+ u16 tci;
spin_lock(&v->tcf_lock);
tcf_lastuse_update(&v->tcf_tm);
@@ -48,6 +49,30 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (err)
goto drop;
break;
+ case TCA_VLAN_ACT_MODIFY:
+ /* No-op if no vlan tag (either hw-accel or in-payload) */
+ if (!skb_vlan_tagged(skb))
+ goto unlock;
+ /* extract existing tag (and guarantee no hw-accel tag) */
+ if (skb_vlan_tag_present(skb)) {
+ tci = skb_vlan_tag_get(skb);
+ skb->vlan_tci = 0;
+ } else {
+ /* in-payload vlan tag, pop it */
+ err = __skb_vlan_pop(skb, &tci);
+ if (err)
+ goto drop;
+ }
+ /* replace the vid */
+ tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+ /* replace prio bits, if tcfv_push_prio specified */
+ if (v->tcfv_push_prio) {
+ tci &= ~VLAN_PRIO_MASK;
+ tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+ }
+ /* put updated tci as hwaccel tag */
+ __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+ break;
default:
BUG();
}
@@ -102,6 +127,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
case TCA_VLAN_ACT_POP:
break;
case TCA_VLAN_ACT_PUSH:
+ case TCA_VLAN_ACT_MODIFY:
if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
if (exists)
tcf_hash_release(*a, bind);
@@ -185,7 +211,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
+ if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
+ v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
(nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
v->tcfv_push_proto) ||
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c6f7a47541eb..bb1d5a487081 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
#define CLS_BPF_NAME_LEN 256
+#define CLS_BPF_SUPPORTED_GEN_FLAGS \
+ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
struct cls_bpf_head {
struct list_head plist;
@@ -39,6 +41,8 @@ struct cls_bpf_prog {
struct list_head link;
struct tcf_result res;
bool exts_integrated;
+ bool offloaded;
+ u32 gen_flags;
struct tcf_exts exts;
u32 handle;
union {
@@ -54,6 +58,7 @@ struct cls_bpf_prog {
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
[TCA_BPF_FLAGS] = { .type = NLA_U32 },
+ [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 },
[TCA_BPF_FD] = { .type = NLA_U32 },
[TCA_BPF_NAME] = { .type = NLA_NUL_STRING,
.len = CLS_BPF_NAME_LEN },
@@ -91,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
- if (at_ingress) {
+ if (tc_skip_sw(prog->gen_flags)) {
+ filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
+ } else if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
bpf_compute_data_end(skb);
@@ -138,6 +145,91 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
return !prog->bpf_ops;
}
+static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ enum tc_clsbpf_command cmd)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_bpf_offload bpf_offload = {};
+ struct tc_to_netdev offload;
+
+ offload.type = TC_SETUP_CLSBPF;
+ offload.cls_bpf = &bpf_offload;
+
+ bpf_offload.command = cmd;
+ bpf_offload.exts = &prog->exts;
+ bpf_offload.prog = prog->filter;
+ bpf_offload.name = prog->bpf_name;
+ bpf_offload.exts_integrated = prog->exts_integrated;
+ bpf_offload.gen_flags = prog->gen_flags;
+
+ return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+}
+
+static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ struct cls_bpf_prog *oldprog)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct cls_bpf_prog *obj = prog;
+ enum tc_clsbpf_command cmd;
+ bool skip_sw;
+ int ret;
+
+ skip_sw = tc_skip_sw(prog->gen_flags) ||
+ (oldprog && tc_skip_sw(oldprog->gen_flags));
+
+ if (oldprog && oldprog->offloaded) {
+ if (tc_should_offload(dev, tp, prog->gen_flags)) {
+ cmd = TC_CLSBPF_REPLACE;
+ } else if (!tc_skip_sw(prog->gen_flags)) {
+ obj = oldprog;
+ cmd = TC_CLSBPF_DESTROY;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ if (!tc_should_offload(dev, tp, prog->gen_flags))
+ return skip_sw ? -EINVAL : 0;
+ cmd = TC_CLSBPF_ADD;
+ }
+
+ ret = cls_bpf_offload_cmd(tp, obj, cmd);
+ if (ret)
+ return skip_sw ? ret : 0;
+
+ obj->offloaded = true;
+ if (oldprog)
+ oldprog->offloaded = false;
+
+ return 0;
+}
+
+static void cls_bpf_stop_offload(struct tcf_proto *tp,
+ struct cls_bpf_prog *prog)
+{
+ int err;
+
+ if (!prog->offloaded)
+ return;
+
+ err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ if (err) {
+ pr_err("Stopping hardware offload failed: %d\n", err);
+ return;
+ }
+
+ prog->offloaded = false;
+}
+
+static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
+ struct cls_bpf_prog *prog)
+{
+ if (!prog->offloaded)
+ return;
+
+ cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+}
+
static int cls_bpf_init(struct tcf_proto *tp)
{
struct cls_bpf_head *head;
@@ -177,6 +269,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
+ cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -193,6 +286,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
return false;
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+ cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -302,6 +396,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
{
bool is_bpf, is_ebpf, have_exts = false;
struct tcf_exts exts;
+ u32 gen_flags = 0;
int ret;
is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
@@ -326,8 +421,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
}
+ if (tb[TCA_BPF_FLAGS_GEN]) {
+ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+ !tc_flags_valid(gen_flags)) {
+ ret = -EINVAL;
+ goto errout;
+ }
+ }
prog->exts_integrated = have_exts;
+ prog->gen_flags = gen_flags;
ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
cls_bpf_prog_from_efd(tb, prog, tp);
@@ -415,6 +519,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout;
+ ret = cls_bpf_offload(tp, prog, oldprog);
+ if (ret) {
+ cls_bpf_delete_prog(tp, prog);
+ return ret;
+ }
+
if (oldprog) {
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
@@ -476,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
tm->tcm_handle = prog->handle;
+ cls_bpf_offload_update_stats(tp, prog);
+
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
@@ -498,6 +610,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
goto nla_put_failure;
+ if (prog->gen_flags &&
+ nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 1c23060c41a6..f10d3397f917 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1408,7 +1408,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
transports) {
if (t->pmtu_pending && t->dst) {
sctp_transport_update_pmtu(sk, t,
- WORD_TRUNC(dst_mtu(t->dst)));
+ SCTP_TRUNC4(dst_mtu(t->dst)));
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index af9cc8055465..8afe2e90d003 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -195,9 +195,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* This is the biggest possible DATA chunk that can fit into
* the packet
*/
- max_data = (asoc->pathmtu -
- sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
+ max_data = asoc->pathmtu -
+ sctp_sk(asoc->base.sk)->pf->af->net_header_len -
+ sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+ max_data = SCTP_TRUNC4(max_data);
max = asoc->frag_point;
/* If the the peer requested that we authenticate DATA chunks
@@ -208,8 +209,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
if (hmac_desc)
- max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) +
- hmac_desc->hmac_len);
+ max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+ hmac_desc->hmac_len);
}
/* Now, check if we need to reduce our max */
@@ -229,7 +230,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
asoc->outqueue.out_qlen == 0 &&
list_empty(&asoc->outqueue.retransmit) &&
msg_len > max)
- max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t));
+ max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
/* Encourage Cookie-ECHO bundling. */
if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 69444d32ecda..a1d85065bfc0 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -605,7 +605,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
/* PMTU discovery (RFC1191) */
if (ICMP_FRAG_NEEDED == code) {
sctp_icmp_frag_needed(sk, asoc, transport,
- WORD_TRUNC(info));
+ SCTP_TRUNC4(info));
goto out_unlock;
} else {
if (ICMP_PROT_UNREACH == code) {
@@ -673,7 +673,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
break;
- ch_end = offset + WORD_ROUND(ntohs(ch->length));
+ ch_end = offset + SCTP_PAD4(ntohs(ch->length));
if (ch_end > skb->len)
break;
@@ -1121,7 +1121,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
break;
- ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+ ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
if (ch_end > skb_tail_pointer(skb))
break;
@@ -1190,7 +1190,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
* that the chunk length doesn't cause overflow. Otherwise, we'll
* walk off the end.
*/
- if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+ if (SCTP_PAD4(ntohs(ch->length)) > skb->len)
return NULL;
/* If this is INIT/INIT-ACK look inside the chunk too. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 6437aa97cfd7..f731de3e8428 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -213,7 +213,7 @@ new_skb:
}
chunk->chunk_hdr = ch;
- chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+ chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0c605ec74dc4..2a5c1896d18f 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -297,7 +297,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
struct sctp_chunk *chunk)
{
sctp_xmit_t retval = SCTP_XMIT_OK;
- __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length));
+ __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
/* Check to see if this chunk will fit into the packet */
retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -508,7 +508,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
if (gso) {
pkt_size = packet->overhead;
list_for_each_entry(chunk, &packet->chunk_list, list) {
- int padded = WORD_ROUND(chunk->skb->len);
+ int padded = SCTP_PAD4(chunk->skb->len);
if (pkt_size + padded > tp->pathmtu)
break;
@@ -538,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* included in the chunk length field. The sender should
* never pad with more than 3 bytes.
*
- * [This whole comment explains WORD_ROUND() below.]
+ * [This whole comment explains SCTP_PAD4() below.]
*/
pkt_size -= packet->overhead;
@@ -560,7 +560,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
has_data = 1;
}
- padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+ padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
if (padding)
memset(skb_put(chunk->skb, padding), 0, padding);
@@ -587,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* acknowledged or have failed.
* Re-queue auth chunks if needed.
*/
- pkt_size -= WORD_ROUND(chunk->skb->len);
+ pkt_size -= SCTP_PAD4(chunk->skb->len);
if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
sctp_chunk_free(chunk);
@@ -911,7 +911,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
*/
maxsize = pmtu - packet->overhead;
if (packet->auth)
- maxsize -= WORD_ROUND(packet->auth->skb->len);
+ maxsize -= SCTP_PAD4(packet->auth->skb->len);
if (chunk_len > maxsize)
retval = SCTP_XMIT_PMTU_FULL;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8c77b87a8565..79dd66079dd7 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -253,7 +253,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
num_types = sp->pf->supported_addrs(sp, types);
chunksize = sizeof(init) + addrs_len;
- chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
+ chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
chunksize += sizeof(ecap_param);
if (asoc->prsctp_enable)
@@ -283,14 +283,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
/* Add HMACS parameter length if any were defined */
auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
if (auth_hmacs->length)
- chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+ chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
else
auth_hmacs = NULL;
/* Add CHUNKS parameter length */
auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
if (auth_chunks->length)
- chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+ chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
else
auth_chunks = NULL;
@@ -300,8 +300,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
/* If we have any extensions to report, account for that */
if (num_ext)
- chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
- num_ext);
+ chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+ num_ext);
/* RFC 2960 3.3.2 Initiation (INIT) (1)
*
@@ -443,13 +443,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
if (auth_hmacs->length)
- chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+ chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
else
auth_hmacs = NULL;
auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
if (auth_chunks->length)
- chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+ chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
else
auth_chunks = NULL;
@@ -458,8 +458,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
}
if (num_ext)
- chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
- num_ext);
+ chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+ num_ext);
/* Now allocate and fill out the chunk. */
retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -1390,7 +1390,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
struct sock *sk;
/* No need to allocate LL here, as this is only a chunk. */
- skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+ skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
if (!skb)
goto nodata;
@@ -1482,7 +1482,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
void *target;
void *padding;
int chunklen = ntohs(chunk->chunk_hdr->length);
- int padlen = WORD_ROUND(chunklen) - chunklen;
+ int padlen = SCTP_PAD4(chunklen) - chunklen;
padding = skb_put(chunk->skb, padlen);
target = skb_put(chunk->skb, len);
@@ -1900,7 +1900,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc,
struct __sctp_missing report;
__u16 len;
- len = WORD_ROUND(sizeof(report));
+ len = SCTP_PAD4(sizeof(report));
/* Make an ERROR chunk, preparing enough room for
* returning multiple unknown parameters.
@@ -2098,9 +2098,9 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
if (*errp) {
if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
- WORD_ROUND(ntohs(param.p->length))))
+ SCTP_PAD4(ntohs(param.p->length))))
sctp_addto_chunk_fixed(*errp,
- WORD_ROUND(ntohs(param.p->length)),
+ SCTP_PAD4(ntohs(param.p->length)),
param.v);
} else {
/* If there is no memory for generating the ERROR
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d88bb2b0b699..026e3bca4a94 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3454,7 +3454,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
}
/* Report violation if chunk len overflows */
- ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+ ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
if (ch_end > skb_tail_pointer(skb))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -4185,7 +4185,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
hdr = unk_chunk->chunk_hdr;
err_chunk = sctp_make_op_error(asoc, unk_chunk,
SCTP_ERROR_UNKNOWN_CHUNK, hdr,
- WORD_ROUND(ntohs(hdr->length)),
+ SCTP_PAD4(ntohs(hdr->length)),
0);
if (err_chunk) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -4203,7 +4203,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
hdr = unk_chunk->chunk_hdr;
err_chunk = sctp_make_op_error(asoc, unk_chunk,
SCTP_ERROR_UNKNOWN_CHUNK, hdr,
- WORD_ROUND(ntohs(hdr->length)),
+ SCTP_PAD4(ntohs(hdr->length)),
0);
if (err_chunk) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 81b86678be4d..ce54dce13ddb 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -233,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
}
if (transport->dst) {
- transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+ transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
} else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
@@ -287,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport,
return;
}
if (transport->dst) {
- transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+ transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
/* Initialize sk->sk_rcv_saddr, if the transport is the
* association's active path for getsockname().
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index d85b803da11d..bea00058ce35 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -383,7 +383,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
ch = (sctp_errhdr_t *)(chunk->skb->data);
cause = ch->cause;
- elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+ elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
/* Pull off the ERROR header. */
skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
@@ -688,7 +688,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
* MUST ignore the padding bytes.
*/
len = ntohs(chunk->chunk_hdr->length);
- padding = WORD_ROUND(len) - len;
+ padding = SCTP_PAD4(len) - len;
/* Fixup cloned skb with just this chunks data. */
skb_trim(skb, chunk->chunk_end - padding - skb->data);