summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/fault-injection/notifier-error-inject.txt1
-rw-r--r--drivers/net/bonding/bond_3ad.c11
-rw-r--r--drivers/net/bonding/bond_main.c111
-rw-r--r--drivers/net/ethernet/atheros/alx/main.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c42
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h46
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c14
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c273
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c720
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c50
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c6
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c46
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c88
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c1282
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h160
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c233
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c435
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h394
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c370
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c182
-rw-r--r--drivers/net/team/team.c55
-rw-r--r--drivers/net/team/team_mode_activebackup.c1
-rw-r--r--drivers/net/team/team_mode_broadcast.c1
-rw-r--r--drivers/net/team/team_mode_loadbalance.c1
-rw-r--r--drivers/net/team/team_mode_random.c1
-rw-r--r--drivers/net/team/team_mode_roundrobin.c1
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--include/linux/if_team.h1
-rw-r--r--include/linux/mlx5/device.h44
-rw-r--r--include/linux/mlx5/driver.h28
-rw-r--r--include/linux/mlx5/flow_table.h9
-rw-r--r--include/linux/mlx5/mlx5_ifc.h174
-rw-r--r--include/linux/mlx5/vport.h37
-rw-r--r--include/linux/netdevice.h56
-rw-r--r--include/net/bonding.h39
-rw-r--r--lib/netdev-notifier-error-inject.c1
-rw-r--r--net/batman-adv/hard-interface.c3
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/core/dev.c51
-rw-r--r--net/openvswitch/vport-netdev.c2
58 files changed, 4939 insertions, 603 deletions
diff --git a/Documentation/fault-injection/notifier-error-inject.txt b/Documentation/fault-injection/notifier-error-inject.txt
index 71e638a4c497..83d3f4e43e91 100644
--- a/Documentation/fault-injection/notifier-error-inject.txt
+++ b/Documentation/fault-injection/notifier-error-inject.txt
@@ -103,6 +103,7 @@ Netdevice notifier events which can be failed are:
* NETDEV_POST_INIT
* NETDEV_PRECHANGEMTU
* NETDEV_PRECHANGEUPPER
+ * NETDEV_CHANGEUPPER
Example: Inject netdevice mtu change error (-22 == -EINVAL)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 940e2ebbdea8..4cbb8b27a891 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -93,7 +93,8 @@ enum ad_link_speed_type {
AD_LINK_SPEED_10000MBPS,
AD_LINK_SPEED_20000MBPS,
AD_LINK_SPEED_40000MBPS,
- AD_LINK_SPEED_56000MBPS
+ AD_LINK_SPEED_56000MBPS,
+ AD_LINK_SPEED_100000MBPS,
};
/* compare MAC addresses */
@@ -258,6 +259,7 @@ static inline int __check_agg_selection_timer(struct port *port)
* %AD_LINK_SPEED_20000MBPS
* %AD_LINK_SPEED_40000MBPS
* %AD_LINK_SPEED_56000MBPS
+ * %AD_LINK_SPEED_100000MBPS
*/
static u16 __get_link_speed(struct port *port)
{
@@ -305,6 +307,10 @@ static u16 __get_link_speed(struct port *port)
speed = AD_LINK_SPEED_56000MBPS;
break;
+ case SPEED_100000:
+ speed = AD_LINK_SPEED_100000MBPS;
+ break;
+
default:
/* unknown speed value from ethtool. shouldn't happen */
speed = 0;
@@ -681,6 +687,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
case AD_LINK_SPEED_56000MBPS:
bandwidth = aggregator->num_of_ports * 56000;
break;
+ case AD_LINK_SPEED_100000MBPS:
+ bandwidth = aggregator->num_of_ports * 100000;
+ break;
default:
bandwidth = 0; /* to silence the compiler */
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 9e0f8a7ef8b1..5a7de43a09f8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -830,7 +830,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
}
new_active->delay = 0;
- bond_set_slave_link_state(new_active, BOND_LINK_UP);
+ bond_set_slave_link_state(new_active, BOND_LINK_UP,
+ BOND_SLAVE_NOTIFY_NOW);
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
@@ -1198,26 +1199,43 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
return ret;
}
-static int bond_master_upper_dev_link(struct net_device *bond_dev,
- struct net_device *slave_dev,
- struct slave *slave)
+static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond)
{
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ return NETDEV_LAG_TX_TYPE_ROUNDROBIN;
+ case BOND_MODE_ACTIVEBACKUP:
+ return NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
+ case BOND_MODE_BROADCAST:
+ return NETDEV_LAG_TX_TYPE_BROADCAST;
+ case BOND_MODE_XOR:
+ case BOND_MODE_8023AD:
+ return NETDEV_LAG_TX_TYPE_HASH;
+ default:
+ return NETDEV_LAG_TX_TYPE_UNKNOWN;
+ }
+}
+
+static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave)
+{
+ struct netdev_lag_upper_info lag_upper_info;
int err;
- err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);
+ lag_upper_info.tx_type = bond_lag_tx_type(bond);
+ err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
+ &lag_upper_info);
if (err)
return err;
- slave_dev->flags |= IFF_SLAVE;
- rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
+ slave->dev->flags |= IFF_SLAVE;
+ rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
return 0;
}
-static void bond_upper_dev_unlink(struct net_device *bond_dev,
- struct net_device *slave_dev)
+static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
{
- netdev_upper_dev_unlink(slave_dev, bond_dev);
- slave_dev->flags &= ~IFF_SLAVE;
- rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
+ netdev_upper_dev_unlink(slave->dev, bond->dev);
+ slave->dev->flags &= ~IFF_SLAVE;
+ rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
}
static struct slave *bond_alloc_slave(struct bonding *bond)
@@ -1299,6 +1317,16 @@ void bond_queue_slave_event(struct slave *slave)
queue_delayed_work(slave->bond->wq, &nnw->work, 0);
}
+void bond_lower_state_changed(struct slave *slave)
+{
+ struct netdev_lag_lower_state_info info;
+
+ info.link_up = slave->link == BOND_LINK_UP ||
+ slave->link == BOND_LINK_FAIL;
+ info.tx_enabled = bond_is_active_slave(slave);
+ netdev_lower_state_changed(slave->dev, &info);
+}
+
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
{
@@ -1563,21 +1591,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
if (bond->params.updelay) {
bond_set_slave_link_state(new_slave,
- BOND_LINK_BACK);
+ BOND_LINK_BACK,
+ BOND_SLAVE_NOTIFY_NOW);
new_slave->delay = bond->params.updelay;
} else {
bond_set_slave_link_state(new_slave,
- BOND_LINK_UP);
+ BOND_LINK_UP,
+ BOND_SLAVE_NOTIFY_NOW);
}
} else {
- bond_set_slave_link_state(new_slave, BOND_LINK_DOWN);
+ bond_set_slave_link_state(new_slave, BOND_LINK_DOWN,
+ BOND_SLAVE_NOTIFY_NOW);
}
} else if (bond->params.arp_interval) {
bond_set_slave_link_state(new_slave,
(netif_carrier_ok(slave_dev) ?
- BOND_LINK_UP : BOND_LINK_DOWN));
+ BOND_LINK_UP : BOND_LINK_DOWN),
+ BOND_SLAVE_NOTIFY_NOW);
} else {
- bond_set_slave_link_state(new_slave, BOND_LINK_UP);
+ bond_set_slave_link_state(new_slave, BOND_LINK_UP,
+ BOND_SLAVE_NOTIFY_NOW);
}
if (new_slave->link != BOND_LINK_DOWN)
@@ -1662,7 +1695,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
goto err_detach;
}
- res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave);
+ res = bond_master_upper_dev_link(bond, new_slave);
if (res) {
netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
goto err_unregister;
@@ -1698,7 +1731,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
/* Undo stages on error */
err_upper_unlink:
- bond_upper_dev_unlink(bond_dev, slave_dev);
+ bond_upper_dev_unlink(bond, new_slave);
err_unregister:
netdev_rx_handler_unregister(slave_dev);
@@ -1799,12 +1832,14 @@ static int __bond_release_one(struct net_device *bond_dev,
return -EINVAL;
}
+ bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW);
+
bond_sysfs_slave_del(slave);
/* recompute stats just before removing the slave */
bond_get_stats(bond->dev, &bond->bond_stats);
- bond_upper_dev_unlink(bond_dev, slave_dev);
+ bond_upper_dev_unlink(bond, slave);
/* unregister rx_handler early so bond_handle_frame wouldn't be called
* for this slave anymore.
*/
@@ -1996,7 +2031,8 @@ static int bond_miimon_inspect(struct bonding *bond)
if (link_state)
continue;
- bond_set_slave_link_state(slave, BOND_LINK_FAIL);
+ bond_set_slave_link_state(slave, BOND_LINK_FAIL,
+ BOND_SLAVE_NOTIFY_LATER);
slave->delay = bond->params.downdelay;
if (slave->delay) {
netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
@@ -2011,7 +2047,8 @@ static int bond_miimon_inspect(struct bonding *bond)
case BOND_LINK_FAIL:
if (link_state) {
/* recovered before downdelay expired */
- bond_set_slave_link_state(slave, BOND_LINK_UP);
+ bond_set_slave_link_state(slave, BOND_LINK_UP,
+ BOND_SLAVE_NOTIFY_LATER);
slave->last_link_up = jiffies;
netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
(bond->params.downdelay - slave->delay) *
@@ -2033,7 +2070,8 @@ static int bond_miimon_inspect(struct bonding *bond)
if (!link_state)
continue;
- bond_set_slave_link_state(slave, BOND_LINK_BACK);
+ bond_set_slave_link_state(slave, BOND_LINK_BACK,
+ BOND_SLAVE_NOTIFY_LATER);
slave->delay = bond->params.updelay;
if (slave->delay) {
@@ -2047,7 +2085,8 @@ static int bond_miimon_inspect(struct bonding *bond)
case BOND_LINK_BACK:
if (!link_state) {
bond_set_slave_link_state(slave,
- BOND_LINK_DOWN);
+ BOND_LINK_DOWN,
+ BOND_SLAVE_NOTIFY_LATER);
netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
(bond->params.updelay - slave->delay) *
bond->params.miimon,
@@ -2085,7 +2124,8 @@ static void bond_miimon_commit(struct bonding *bond)
continue;
case BOND_LINK_UP:
- bond_set_slave_link_state(slave, BOND_LINK_UP);
+ bond_set_slave_link_state(slave, BOND_LINK_UP,
+ BOND_SLAVE_NOTIFY_NOW);
slave->last_link_up = jiffies;
primary = rtnl_dereference(bond->primary_slave);
@@ -2125,7 +2165,8 @@ static void bond_miimon_commit(struct bonding *bond)
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
- bond_set_slave_link_state(slave, BOND_LINK_DOWN);
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN,
+ BOND_SLAVE_NOTIFY_NOW);
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
BOND_MODE(bond) == BOND_MODE_8023AD)
@@ -2708,7 +2749,8 @@ static void bond_ab_arp_commit(struct bonding *bond)
struct slave *current_arp_slave;
current_arp_slave = rtnl_dereference(bond->current_arp_slave);
- bond_set_slave_link_state(slave, BOND_LINK_UP);
+ bond_set_slave_link_state(slave, BOND_LINK_UP,
+ BOND_SLAVE_NOTIFY_NOW);
if (current_arp_slave) {
bond_set_slave_inactive_flags(
current_arp_slave,
@@ -2731,7 +2773,8 @@ static void bond_ab_arp_commit(struct bonding *bond)
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
- bond_set_slave_link_state(slave, BOND_LINK_DOWN);
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN,
+ BOND_SLAVE_NOTIFY_NOW);
bond_set_slave_inactive_flags(slave,
BOND_SLAVE_NOTIFY_NOW);
@@ -2810,7 +2853,8 @@ static bool bond_ab_arp_probe(struct bonding *bond)
* up when it is actually down
*/
if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
- bond_set_slave_link_state(slave, BOND_LINK_DOWN);
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN,
+ BOND_SLAVE_NOTIFY_LATER);
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
@@ -2830,7 +2874,8 @@ static bool bond_ab_arp_probe(struct bonding *bond)
if (!new_slave)
goto check_state;
- bond_set_slave_link_state(new_slave, BOND_LINK_BACK);
+ bond_set_slave_link_state(new_slave, BOND_LINK_BACK,
+ BOND_SLAVE_NOTIFY_LATER);
bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER);
bond_arp_send_all(bond, new_slave);
new_slave->last_link_up = jiffies;
@@ -2838,7 +2883,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
check_state:
bond_for_each_slave_rcu(bond, slave, iter) {
- if (slave->should_notify) {
+ if (slave->should_notify || slave->should_notify_link) {
should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW;
break;
}
@@ -2893,8 +2938,10 @@ re_arm:
if (should_notify_peers)
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
- if (should_notify_rtnl)
+ if (should_notify_rtnl) {
bond_slave_state_notify(bond);
+ bond_slave_link_notify(bond);
+ }
rtnl_unlock();
}
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c8af3ce3ea38..0a81808f1968 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -577,7 +577,6 @@ static int alx_alloc_rings(struct alx_priv *alx)
alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
- alx->tx_ringsz = alx->tx_ringsz;
netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b447af6973bb..8cd395d1cd09 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10274,52 +10274,48 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
static void i40e_print_features(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
- char *buf, *string;
- int i = 0;
+ char *buf;
+ int i;
- string = kzalloc(INFO_STRING_LEN, GFP_KERNEL);
- if (!string) {
- dev_err(&pf->pdev->dev, "Features string allocation failed\n");
+ buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL);
+ if (!buf)
return;
- }
-
- buf = string;
- i += snprintf(&buf[i], REMAIN(i), "Features: PF-id[%d] ", hw->pf_id);
+ i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id);
#ifdef CONFIG_PCI_IOV
- i += snprintf(&buf[i], REMAIN(i), "VFs: %d ", pf->num_req_vfs);
+ i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
#endif
- i += snprintf(&buf[i], REMAIN(i), "VSIs: %d QP: %d RX: %s ",
+ i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d RX: %s",
pf->hw.func_caps.num_vsis,
pf->vsi[pf->lan_vsi]->num_queue_pairs,
pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF");
if (pf->flags & I40E_FLAG_RSS_ENABLED)
- i += snprintf(&buf[i], REMAIN(i), "RSS ");
+ i += snprintf(&buf[i], REMAIN(i), " RSS");
if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
- i += snprintf(&buf[i], REMAIN(i), "FD_ATR ");
+ i += snprintf(&buf[i], REMAIN(i), " FD_ATR");
if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
- i += snprintf(&buf[i], REMAIN(i), "FD_SB ");
- i += snprintf(&buf[i], REMAIN(i), "NTUPLE ");
+ i += snprintf(&buf[i], REMAIN(i), " FD_SB");
+ i += snprintf(&buf[i], REMAIN(i), " NTUPLE");
}
if (pf->flags & I40E_FLAG_DCB_CAPABLE)
- i += snprintf(&buf[i], REMAIN(i), "DCB ");
+ i += snprintf(&buf[i], REMAIN(i), " DCB");
#if IS_ENABLED(CONFIG_VXLAN)
- i += snprintf(&buf[i], REMAIN(i), "VxLAN ");
+ i += snprintf(&buf[i], REMAIN(i), " VxLAN");
#endif
if (pf->flags & I40E_FLAG_PTP)
- i += snprintf(&buf[i], REMAIN(i), "PTP ");
+ i += snprintf(&buf[i], REMAIN(i), " PTP");
#ifdef I40E_FCOE
if (pf->flags & I40E_FLAG_FCOE_ENABLED)
- i += snprintf(&buf[i], REMAIN(i), "FCOE ");
+ i += snprintf(&buf[i], REMAIN(i), " FCOE");
#endif
if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
- i += snprintf(&buf[i], REMAIN(i), "VEPA ");
+ i += snprintf(&buf[i], REMAIN(i), " VEB");
else
- buf += sprintf(buf, "VEPA ");
+ i += snprintf(&buf[i], REMAIN(i), " VEPA");
- dev_info(&pf->pdev->dev, "%s\n", string);
- kfree(string);
+ dev_info(&pf->pdev->dev, "%s\n", buf);
+ kfree(buf);
WARN_ON(i > INFO_STRING_LEN);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 1d2174526a4c..445b4c9169b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -139,6 +139,7 @@ enum ixgbe_tx_flags {
#define IXGBE_X540_VF_DEVICE_ID 0x1515
struct vf_data_storage {
+ struct pci_dev *vfdev;
unsigned char vf_mac_addresses[ETH_ALEN];
u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
u16 num_vf_mc_hashes;
@@ -224,6 +225,8 @@ struct ixgbe_rx_queue_stats {
u64 csum_err;
};
+#define IXGBE_TS_HDR_LEN 8
+
enum ixgbe_ring_state_t {
__IXGBE_TX_FDIR_INIT_DONE,
__IXGBE_TX_XPS_INIT_DONE,
@@ -282,6 +285,8 @@ struct ixgbe_ring {
u16 next_to_use;
u16 next_to_clean;
+ unsigned long last_rx_timestamp;
+
union {
u16 next_to_alloc;
struct {
@@ -587,9 +592,10 @@ static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
struct ixgbe_mac_addr {
u8 addr[ETH_ALEN];
- u16 queue;
+ u16 pool;
u16 state; /* bitmask */
};
+
#define IXGBE_MAC_STATE_DEFAULT 0x1
#define IXGBE_MAC_STATE_MODIFIED 0x2
#define IXGBE_MAC_STATE_IN_USE 0x4
@@ -639,6 +645,8 @@ struct ixgbe_adapter {
#define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 22)
#define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 23)
#define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE BIT(24)
+#define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25)
+#define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26)
u32 flags2;
#define IXGBE_FLAG2_RSC_CAPABLE (u32)(1 << 0)
@@ -755,9 +763,12 @@ struct ixgbe_adapter {
unsigned long last_rx_ptp_check;
unsigned long last_rx_timestamp;
spinlock_t tmreg_lock;
- struct cyclecounter cc;
- struct timecounter tc;
+ struct cyclecounter hw_cc;
+ struct timecounter hw_tc;
u32 base_incval;
+ u32 tx_hwtstamp_timeouts;
+ u32 rx_hwtstamp_cleared;
+ void (*ptp_setup_sdp)(struct ixgbe_adapter *);
/* SR-IOV */
DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
@@ -883,9 +894,9 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
#endif
int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
- u8 *addr, u16 queue);
+ const u8 *addr, u16 queue);
int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
- u8 *addr, u16 queue);
+ const u8 *addr, u16 queue);
void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *,
struct ixgbe_ring *);
@@ -968,12 +979,33 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter);
void ixgbe_ptp_stop(struct ixgbe_adapter *adapter);
void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter);
void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter);
-void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb);
+void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *);
+void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb);
+static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_TSIP))) {
+ ixgbe_ptp_rx_pktstamp(rx_ring->q_vector, skb);
+ return;
+ }
+
+ if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS)))
+ return;
+
+ ixgbe_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
+ /* Update the last_rx_timestamp timer in order to enable watchdog check
+ * for error case of latched timestamp on a dropped packet.
+ */
+ rx_ring->last_rx_timestamp = jiffies;
+}
+
int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr);
int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr);
void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter);
void ixgbe_ptp_reset(struct ixgbe_adapter *adapter);
-void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr);
+void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter);
#ifdef CONFIG_PCI_IOV
void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter);
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 65db69b862fb..8f09d291a043 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
+ Copyright(c) 1999 - 2015 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -765,13 +765,14 @@ mac_reset_top:
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST;
IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
IXGBE_WRITE_FLUSH(hw);
+ usleep_range(1000, 1200);
/* Poll for reset bit to self-clear indicating reset is complete */
for (i = 0; i < 10; i++) {
- udelay(1);
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
if (!(ctrl & IXGBE_CTRL_RST))
break;
+ udelay(1);
}
if (ctrl & IXGBE_CTRL_RST) {
status = IXGBE_ERR_RESET_FAILED;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index a39afcf03e2c..b8bd72589f72 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -990,13 +990,14 @@ mac_reset_top:
ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
IXGBE_WRITE_FLUSH(hw);
+ usleep_range(1000, 1200);
/* Poll for reset bit to self-clear indicating reset is complete */
for (i = 0; i < 10; i++) {
- udelay(1);
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
if (!(ctrl & IXGBE_CTRL_RST_MASK))
break;
+ udelay(1);
}
if (ctrl & IXGBE_CTRL_RST_MASK) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index ce61b36b94f1..daec6aef5dc8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
+ Copyright(c) 1999 - 2015 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -2454,6 +2454,17 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw)
/* Always set this bit to ensure any future transactions are blocked */
IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS);
+ /* Poll for bit to read as set */
+ for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+ if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS)
+ break;
+ usleep_range(100, 120);
+ }
+ if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) {
+ hw_dbg(hw, "GIO disable did not set - requesting resets\n");
+ goto gio_disable_fail;
+ }
+
/* Exit if master requests are blocked */
if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) ||
ixgbe_removed(hw->hw_addr))
@@ -2475,6 +2486,7 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw)
* again to clear out any effects they may have had on our device.
*/
hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n");
+gio_disable_fail:
hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
if (hw->mac.type >= ixgbe_mac_X550)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c95042ee30de..ebd4522e7879 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -172,6 +172,8 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
+static struct workqueue_struct *ixgbe_wq;
+
static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev);
static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
@@ -313,7 +315,7 @@ static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
!test_bit(__IXGBE_REMOVING, &adapter->state) &&
!test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state))
- schedule_work(&adapter->service_task);
+ queue_work(ixgbe_wq, &adapter->service_task);
}
static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
@@ -1632,6 +1634,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
struct sk_buff *skb)
{
struct net_device *dev = rx_ring->netdev;
+ u32 flags = rx_ring->q_vector->adapter->flags;
ixgbe_update_rsc_stats(rx_ring, skb);
@@ -1639,8 +1642,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
ixgbe_rx_checksum(rx_ring, rx_desc, skb);
- if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS)))
- ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb);
+ if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED))
+ ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
@@ -2738,7 +2741,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data)
ixgbe_check_fan_failure(adapter, eicr);
if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
- ixgbe_ptp_check_pps_event(adapter, eicr);
+ ixgbe_ptp_check_pps_event(adapter);
/* re-enable the original interrupt state, no lsc, no queues */
if (!test_bit(__IXGBE_DOWN, &adapter->state))
@@ -2945,7 +2948,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
ixgbe_check_fan_failure(adapter, eicr);
if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
- ixgbe_ptp_check_pps_event(adapter, eicr);
+ ixgbe_ptp_check_pps_event(adapter);
/* would disable interrupts here but EIAM disabled it */
napi_schedule_irqoff(&q_vector->napi);
@@ -4029,124 +4032,156 @@ static int ixgbe_write_mc_addr_list(struct net_device *netdev)
#ifdef CONFIG_PCI_IOV
void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
{
+ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
struct ixgbe_hw *hw = &adapter->hw;
int i;
- for (i = 0; i < hw->mac.num_rar_entries; i++) {
- if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
- hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr,
- adapter->mac_table[i].queue,
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+ mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
+
+ if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
+ hw->mac.ops.set_rar(hw, i,
+ mac_table->addr,
+ mac_table->pool,
IXGBE_RAH_AV);
else
hw->mac.ops.clear_rar(hw, i);
-
- adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED);
}
}
-#endif
+#endif
static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
{
+ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
struct ixgbe_hw *hw = &adapter->hw;
int i;
- for (i = 0; i < hw->mac.num_rar_entries; i++) {
- if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) {
- if (adapter->mac_table[i].state &
- IXGBE_MAC_STATE_IN_USE)
- hw->mac.ops.set_rar(hw, i,
- adapter->mac_table[i].addr,
- adapter->mac_table[i].queue,
- IXGBE_RAH_AV);
- else
- hw->mac.ops.clear_rar(hw, i);
- adapter->mac_table[i].state &=
- ~(IXGBE_MAC_STATE_MODIFIED);
- }
+ for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+ if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED))
+ continue;
+
+ mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
+
+ if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
+ hw->mac.ops.set_rar(hw, i,
+ mac_table->addr,
+ mac_table->pool,
+ IXGBE_RAH_AV);
+ else
+ hw->mac.ops.clear_rar(hw, i);
}
}
static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
{
- int i;
+ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
struct ixgbe_hw *hw = &adapter->hw;
+ int i;
- for (i = 0; i < hw->mac.num_rar_entries; i++) {
- adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
- adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
- eth_zero_addr(adapter->mac_table[i].addr);
- adapter->mac_table[i].queue = 0;
+ for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+ mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
+ mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
}
+
ixgbe_sync_mac_table(adapter);
}
-static int ixgbe_available_rars(struct ixgbe_adapter *adapter)
+static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool)
{
+ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
struct ixgbe_hw *hw = &adapter->hw;
int i, count = 0;
- for (i = 0; i < hw->mac.num_rar_entries; i++) {
- if (adapter->mac_table[i].state == 0)
- count++;
+ for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+ /* do not count default RAR as available */
+ if (mac_table->state & IXGBE_MAC_STATE_DEFAULT)
+ continue;
+
+ /* only count unused and addresses that belong to us */
+ if (mac_table->state & IXGBE_MAC_STATE_IN_USE) {
+ if (mac_table->pool != pool)
+ continue;
+ }
+
+ count++;
}
+
return count;
}
/* this function destroys the first RAR entry */
-static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter,
- u8 *addr)
+static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter)
{
+ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
struct ixgbe_hw *hw = &adapter->hw;
- memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN);
- adapter->mac_table[0].queue = VMDQ_P(0);
- adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
- IXGBE_MAC_STATE_IN_USE);
- hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
- adapter->mac_table[0].queue,
+ memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN);
+ mac_table->pool = VMDQ_P(0);
+
+ mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE;
+
+ hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool,
IXGBE_RAH_AV);
}
-int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+ const u8 *addr, u16 pool)
{
+ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
struct ixgbe_hw *hw = &adapter->hw;
int i;
if (is_zero_ether_addr(addr))
return -EINVAL;
- for (i = 0; i < hw->mac.num_rar_entries; i++) {
- if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
+ for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+ if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
continue;
- adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED |
- IXGBE_MAC_STATE_IN_USE);
- ether_addr_copy(adapter->mac_table[i].addr, addr);
- adapter->mac_table[i].queue = queue;
+
+ ether_addr_copy(mac_table->addr, addr);
+ mac_table->pool = pool;
+
+ mac_table->state |= IXGBE_MAC_STATE_MODIFIED |
+ IXGBE_MAC_STATE_IN_USE;
+
ixgbe_sync_mac_table(adapter);
+
return i;
}
+
return -ENOMEM;
}
-int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
+ const u8 *addr, u16 pool)
{
- /* search table for addr, if found, set to 0 and sync */
- int i;
+ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
struct ixgbe_hw *hw = &adapter->hw;
+ int i;
if (is_zero_ether_addr(addr))
return -EINVAL;
- for (i = 0; i < hw->mac.num_rar_entries; i++) {
- if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
- adapter->mac_table[i].queue == queue) {
- adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
- adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
- eth_zero_addr(adapter->mac_table[i].addr);
- adapter->mac_table[i].queue = 0;
- ixgbe_sync_mac_table(adapter);
- return 0;
- }
+ /* search table for addr, if found clear IN_USE flag and sync */
+ for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+ /* we can only delete an entry if it is in use */
+ if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE))
+ continue;
+ /* we only care about entries that belong to the given pool */
+ if (mac_table->pool != pool)
+ continue;
+ /* we only care about a specific MAC address */
+ if (!ether_addr_equal(addr, mac_table->addr))
+ continue;
+
+ mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
+ mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
+
+ ixgbe_sync_mac_table(adapter);
+
+ return 0;
}
+
return -ENOMEM;
}
/**
@@ -4164,7 +4199,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
int count = 0;
/* return ENOMEM indicating insufficient memory for addresses */
- if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter))
+ if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn))
return -ENOMEM;
if (!netdev_uc_empty(netdev)) {
@@ -4178,6 +4213,25 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
return count;
}
+static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ int ret;
+
+ ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0));
+
+ return min_t(int, ret, 0);
+}
+
+static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0));
+
+ return 0;
+}
+
/**
* ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
* @netdev: network interface device structure
@@ -4233,8 +4287,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
* sufficient space to store all the addresses then enable
* unicast promiscuous mode
*/
- count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0));
- if (count < 0) {
+ if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) {
fctrl |= IXGBE_FCTRL_UPE;
vmolr |= IXGBE_VMOLR_ROPE;
}
@@ -5037,7 +5090,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
struct net_device *netdev = adapter->netdev;
int err;
- u8 old_addr[ETH_ALEN];
if (ixgbe_removed(hw->hw_addr))
return;
@@ -5073,10 +5125,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
}
clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
- /* do not flush user set addresses */
- memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len);
+
+ /* flush entries out of MAC table */
ixgbe_flush_sw_mac_table(adapter);
- ixgbe_mac_set_default_filter(adapter, old_addr);
+ __dev_uc_unsync(netdev, NULL);
+
+ /* do not flush user set addresses */
+ ixgbe_mac_set_default_filter(adapter);
/* update SAN MAC vmdq pool selection */
if (hw->mac.san_mac_rar_index)
@@ -6611,10 +6666,8 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
struct pci_dev *pdev = adapter->pdev;
- struct pci_dev *vfdev;
+ unsigned int vf;
u32 gpc;
- int pos;
- unsigned short vf_id;
if (!(netif_carrier_ok(adapter->netdev)))
return;
@@ -6631,26 +6684,17 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
if (!pdev)
return;
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
- if (!pos)
- return;
-
- /* get the device ID for the VF */
- pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
-
/* check status reg for all VFs owned by this PF */
- vfdev = pci_get_device(pdev->vendor, vf_id, NULL);
- while (vfdev) {
- if (vfdev->is_virtfn && (vfdev->physfn == pdev)) {
- u16 status_reg;
-
- pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
- if (status_reg & PCI_STATUS_REC_MASTER_ABORT)
- /* issue VFLR */
- ixgbe_issue_vf_flr(adapter, vfdev);
- }
+ for (vf = 0; vf < adapter->num_vfs; ++vf) {
+ struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+ u16 status_reg;
- vfdev = pci_get_device(pdev->vendor, vf_id, vfdev);
+ if (!vfdev)
+ continue;
+ pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
+ if (status_reg != IXGBE_FAILED_READ_CFG_WORD &&
+ status_reg & PCI_STATUS_REC_MASTER_ABORT)
+ ixgbe_issue_vf_flr(adapter, vfdev);
}
}
@@ -7019,6 +7063,7 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
struct tcphdr *tcphdr;
u8 *raw;
} transport_hdr;
+ __be16 frag_off;
if (skb->encapsulation) {
network_hdr.raw = skb_inner_network_header(skb);
@@ -7042,13 +7087,17 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
case 6:
vlan_macip_lens |= transport_hdr.raw - network_hdr.raw;
l4_hdr = network_hdr.ipv6->nexthdr;
+ if (likely((transport_hdr.raw - network_hdr.raw) ==
+ sizeof(struct ipv6hdr)))
+ break;
+ ipv6_skip_exthdr(skb, network_hdr.raw - skb->data +
+ sizeof(struct ipv6hdr),
+ &l4_hdr, &frag_off);
+ if (unlikely(frag_off))
+ l4_hdr = NEXTHDR_FRAGMENT;
break;
default:
- if (unlikely(net_ratelimit())) {
- dev_warn(tx_ring->dev,
- "partial checksum but version=%d\n",
- network_hdr.ipv4->version);
- }
+ break;
}
switch (l4_hdr) {
@@ -7069,16 +7118,18 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
default:
if (unlikely(net_ratelimit())) {
dev_warn(tx_ring->dev,
- "partial checksum but l4 proto=%x!\n",
- l4_hdr);
+ "partial checksum, version=%d, l4 proto=%x\n",
+ network_hdr.ipv4->version, l4_hdr);
}
- break;
+ skb_checksum_help(skb);
+ goto no_csum;
}
/* update TX checksum flag */
first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
}
+no_csum:
/* vlan_macip_lens: MACLEN, VLAN tag */
vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
@@ -7654,17 +7705,16 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p)
struct ixgbe_adapter *adapter = netdev_priv(netdev);
struct ixgbe_hw *hw = &adapter->hw;
struct sockaddr *addr = p;
- int ret;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
- ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0));
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
- ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0));
- return ret > 0 ? 0 : ret;
+ ixgbe_mac_set_default_filter(adapter);
+
+ return 0;
}
static int
@@ -8147,7 +8197,10 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
{
/* guarantee we can provide a unique filter for the unicast address */
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
- if (IXGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev))
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ u16 pool = VMDQ_P(0);
+
+ if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool))
return -ENOMEM;
}
@@ -8865,7 +8918,7 @@ skip_sriov:
goto err_sw_init;
}
- ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr);
+ ixgbe_mac_set_default_filter(adapter);
setup_timer(&adapter->service_timer, &ixgbe_service_timer,
(unsigned long) adapter);
@@ -9320,6 +9373,12 @@ static int __init ixgbe_init_module(void)
pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
pr_info("%s\n", ixgbe_copyright);
+ ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name);
+ if (!ixgbe_wq) {
+ pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name);
+ return -ENOMEM;
+ }
+
ixgbe_dbg_init();
ret = pci_register_driver(&ixgbe_driver);
@@ -9351,6 +9410,10 @@ static void __exit ixgbe_exit_module(void)
pci_unregister_driver(&ixgbe_driver);
ixgbe_dbg_exit();
+ if (ixgbe_wq) {
+ destroy_workqueue(ixgbe_wq);
+ ixgbe_wq = NULL;
+ }
}
#ifdef CONFIG_IXGBE_DCA
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index fb8673d63806..db0731e05401 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -2393,6 +2393,9 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
return 0;
+ if (!on && ixgbe_mng_present(hw))
+ return 0;
+
status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL,
IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
&reg);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index e5ba04025e2b..ef1504d41890 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
+ Copyright(c) 1999 - 2015 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -27,6 +27,7 @@
*******************************************************************************/
#include "ixgbe.h"
#include <linux/ptp_classify.h>
+#include <linux/clocksource.h>
/*
* The 82599 and the X540 do not have true 64bit nanosecond scale
@@ -93,7 +94,6 @@
#define IXGBE_INCVAL_SHIFT_82599 7
#define IXGBE_INCPER_SHIFT_82599 24
-#define IXGBE_MAX_TIMEADJ_VALUE 0x7FFFFFFFFFFFFFFFULL
#define IXGBE_OVERFLOW_PERIOD (HZ * 30)
#define IXGBE_PTP_TX_TIMEOUT (HZ * 15)
@@ -104,8 +104,68 @@
*/
#define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL
+/* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL
+ * which contain measurements of seconds and nanoseconds respectively. This
+ * matches the standard linux representation of time in the kernel. In addition,
+ * the X550 also has a SYSTIMER register which represents residue, or
+ * subnanosecond overflow adjustments. To control clock adjustment, the TIMINCA
+ * register is used, but it is unlike the X540 and 82599 devices. TIMINCA
+ * represents units of 2^-32 nanoseconds, and uses 31 bits for this, with the
+ * high bit representing whether the adjustent is positive or negative. Every
+ * clock cycle, the X550 will add 12.5 ns + TIMINCA which can result in a range
+ * of 12 to 13 nanoseconds adjustment. Unlike the 82599 and X540 devices, the
+ * X550's clock for purposes of SYSTIME generation is constant and not dependent
+ * on the link speed.
+ *
+ * SYSTIMEH SYSTIMEL SYSTIMER
+ * +--------------+ +--------------+ +-------------+
+ * X550 | 32 | | 32 | | 32 |
+ * *--------------+ +--------------+ +-------------+
+ * \____seconds___/ \_nanoseconds_/ \__2^-32 ns__/
+ *
+ * This results in a full 96 bits to represent the clock, with 32 bits for
+ * seconds, 32 bits for nanoseconds (largest value is 0d999999999 or just under
+ * 1 second) and an additional 32 bits to measure sub nanosecond adjustments for
+ * underflow of adjustments.
+ *
+ * The 32 bits of seconds for the X550 overflows every
+ * 2^32 / ( 365.25 * 24 * 60 * 60 ) = ~136 years.
+ *
+ * In order to adjust the clock frequency for the X550, the TIMINCA register is
+ * provided. This register represents a + or minus nearly 0.5 ns adjustment to
+ * the base frequency. It is measured in 2^-32 ns units, with the high bit being
+ * the sign bit. This register enables software to calculate frequency
+ * adjustments and apply them directly to the clock rate.
+ *
+ * The math for converting ppb into TIMINCA values is fairly straightforward.
+ * TIMINCA value = ( Base_Frequency * ppb ) / 1000000000ULL
+ *
+ * This assumes that ppb is never high enough to create a value bigger than
+ * TIMINCA's 31 bits can store. This is ensured by the stack. Calculating this
+ * value is also simple.
+ * Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL
+ *
+ * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is
+ * 12.5 nanoseconds. This means that the Max ppb is 39999999
+ * Note: We subtract one in order to ensure no overflow, because the TIMINCA
+ * register can only hold slightly under 0.5 nanoseconds.
+ *
+ * Because TIMINCA is measured in 2^-32 ns units, we have to convert 12.5 ns
+ * into 2^-32 units, which is
+ *
+ * 12.5 * 2^32 = C80000000
+ *
+ * Some revisions of hardware have a faster base frequency than the registers
+ * were defined for. To fix this, we use a timecounter structure with the
+ * proper mult and shift to convert the cycles into nanoseconds of time.
+ */
+#define IXGBE_X550_BASE_PERIOD 0xC80000000ULL
+#define INCVALUE_MASK 0x7FFFFFFF
+#define ISGN 0x80000000
+#define MAX_TIMADJ 0x7FFFFFFF
+
/**
- * ixgbe_ptp_setup_sdp
+ * ixgbe_ptp_setup_sdp_x540
* @hw: the hardware private structure
*
* this function enables or disables the clock out feature on SDP0 for
@@ -116,83 +176,116 @@
* aligns the start of the PPS signal to that value. The shift is
* necessary because it can change based on the link speed.
*/
-static void ixgbe_ptp_setup_sdp(struct ixgbe_adapter *adapter)
+static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
- int shift = adapter->cc.shift;
+ int shift = adapter->hw_cc.shift;
u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem;
u64 ns = 0, clock_edge = 0;
- if ((adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED) &&
- (hw->mac.type == ixgbe_mac_X540)) {
+ /* disable the pin first */
+ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
+ IXGBE_WRITE_FLUSH(hw);
- /* disable the pin first */
- IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
- IXGBE_WRITE_FLUSH(hw);
+ if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED))
+ return;
- esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
- /*
- * enable the SDP0 pin as output, and connected to the
- * native function for Timesync (ClockOut)
- */
- esdp |= (IXGBE_ESDP_SDP0_DIR |
- IXGBE_ESDP_SDP0_NATIVE);
+ /* enable the SDP0 pin as output, and connected to the
+ * native function for Timesync (ClockOut)
+ */
+ esdp |= IXGBE_ESDP_SDP0_DIR |
+ IXGBE_ESDP_SDP0_NATIVE;
- /*
- * enable the Clock Out feature on SDP0, and allow
- * interrupts to occur when the pin changes
- */
- tsauxc = (IXGBE_TSAUXC_EN_CLK |
- IXGBE_TSAUXC_SYNCLK |
- IXGBE_TSAUXC_SDP0_INT);
+ /* enable the Clock Out feature on SDP0, and allow
+ * interrupts to occur when the pin changes
+ */
+ tsauxc = IXGBE_TSAUXC_EN_CLK |
+ IXGBE_TSAUXC_SYNCLK |
+ IXGBE_TSAUXC_SDP0_INT;
- /* clock period (or pulse length) */
- clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift);
- clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32);
+ /* clock period (or pulse length) */
+ clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift);
+ clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32);
- /*
- * Account for the cyclecounter wrap-around value by
- * using the converted ns value of the current time to
- * check for when the next aligned second would occur.
- */
- clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
- clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
- ns = timecounter_cyc2time(&adapter->tc, clock_edge);
+ /* Account for the cyclecounter wrap-around value by
+ * using the converted ns value of the current time to
+ * check for when the next aligned second would occur.
+ */
+ clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+ clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+ ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge);
- div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem);
- clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift);
+ div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem);
+ clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift);
- /* specify the initial clock start time */
- trgttiml = (u32)clock_edge;
- trgttimh = (u32)(clock_edge >> 32);
+ /* specify the initial clock start time */
+ trgttiml = (u32)clock_edge;
+ trgttimh = (u32)(clock_edge >> 32);
- IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml);
- IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh);
- IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
- IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
+ IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml);
+ IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh);
+ IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
+ IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
- IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
- IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
- } else {
- IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
- }
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
IXGBE_WRITE_FLUSH(hw);
}
/**
- * ixgbe_ptp_read - read raw cycle counter (to be used by time counter)
+ * ixgbe_ptp_read_X550 - read cycle counter value
+ * @hw_cc: cyclecounter structure
+ *
+ * This function reads SYSTIME registers. It is called by the cyclecounter
+ * structure to convert from internal representation into nanoseconds. We need
+ * this for X550 since some skews do not have expected clock frequency and
+ * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of
+ * "cycles", rather than seconds and nanoseconds.
+ */
+static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc)
+{
+ struct ixgbe_adapter *adapter =
+ container_of(hw_cc, struct ixgbe_adapter, hw_cc);
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct timespec64 ts;
+
+ /* storage is 32 bits of 'billions of cycles' and 32 bits of 'cycles'.
+ * Some revisions of hardware run at a higher frequency and so the
+ * cycles are not guaranteed to be nanoseconds. The timespec64 created
+ * here is used for its math/conversions but does not necessarily
+ * represent nominal time.
+ *
+ * It should be noted that this cyclecounter will overflow at a
+ * non-bitmask field since we have to convert our billions of cycles
+ * into an actual cycles count. This results in some possible weird
+ * situations at high cycle counter stamps. However given that 32 bits
+ * of "seconds" is ~138 years this isn't a problem. Even at the
+ * increased frequency of some revisions, this is still ~103 years.
+ * Since the SYSTIME values start at 0 and we never write them, it is
+ * highly unlikely for the cyclecounter to overflow in practice.
+ */
+ IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
+ ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+ ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
+
+ return (u64)timespec64_to_ns(&ts);
+}
+
+/**
+ * ixgbe_ptp_read_82599 - read raw cycle counter (to be used by time counter)
* @cc: the cyclecounter structure
*
* this function reads the cyclecounter registers and is called by the
* cyclecounter structure used to construct a ns counter from the
* arbitrary fixed point registers
*/
-static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc)
+static cycle_t ixgbe_ptp_read_82599(const struct cyclecounter *cc)
{
struct ixgbe_adapter *adapter =
- container_of(cc, struct ixgbe_adapter, cc);
+ container_of(cc, struct ixgbe_adapter, hw_cc);
struct ixgbe_hw *hw = &adapter->hw;
u64 stamp = 0;
@@ -203,20 +296,79 @@ static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc)
}
/**
- * ixgbe_ptp_adjfreq
+ * ixgbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp
+ * @adapter: private adapter structure
+ * @hwtstamp: stack timestamp structure
+ * @systim: unsigned 64bit system time value
+ *
+ * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value
+ * which can be used by the stack's ptp functions.
+ *
+ * The lock is used to protect consistency of the cyclecounter and the SYSTIME
+ * registers. However, it does not need to protect against the Rx or Tx
+ * timestamp registers, as there can't be a new timestamp until the old one is
+ * unlatched by reading.
+ *
+ * In addition to the timestamp in hardware, some controllers need a software
+ * overflow cyclecounter, and this function takes this into account as well.
+ **/
+static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter,
+ struct skb_shared_hwtstamps *hwtstamp,
+ u64 timestamp)
+{
+ unsigned long flags;
+ struct timespec64 systime;
+ u64 ns;
+
+ memset(hwtstamp, 0, sizeof(*hwtstamp));
+
+ switch (adapter->hw.mac.type) {
+ /* X550 and later hardware supposedly represent time using a seconds
+ * and nanoseconds counter, instead of raw 64bits nanoseconds. We need
+ * to convert the timestamp into cycles before it can be fed to the
+ * cyclecounter. We need an actual cyclecounter because some revisions
+ * of hardware run at a higher frequency and thus the counter does
+ * not represent seconds/nanoseconds. Instead it can be thought of as
+ * cycles and billions of cycles.
+ */
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ /* Upper 32 bits represent billions of cycles, lower 32 bits
+ * represent cycles. However, we use timespec64_to_ns for the
+ * correct math even though the units haven't been corrected
+ * yet.
+ */
+ systime.tv_sec = timestamp >> 32;
+ systime.tv_nsec = timestamp & 0xFFFFFFFF;
+
+ timestamp = timespec64_to_ns(&systime);
+ break;
+ default:
+ break;
+ }
+
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ ns = timecounter_cyc2time(&adapter->hw_tc, timestamp);
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ hwtstamp->hwtstamp = ns_to_ktime(ns);
+}
+
+/**
+ * ixgbe_ptp_adjfreq_82599
* @ptp: the ptp clock structure
* @ppb: parts per billion adjustment from base
*
* adjust the frequency of the ptp cycle counter by the
* indicated ppb from the base frequency.
*/
-static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb)
{
struct ixgbe_adapter *adapter =
container_of(ptp, struct ixgbe_adapter, ptp_caps);
struct ixgbe_hw *hw = &adapter->hw;
- u64 freq;
- u32 diff, incval;
+ u64 freq, incval;
+ u32 diff;
int neg_adj = 0;
if (ppb < 0) {
@@ -235,12 +387,16 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
switch (hw->mac.type) {
case ixgbe_mac_X540:
- IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval);
+ if (incval > 0xFFFFFFFFULL)
+ e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n");
+ IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval);
break;
case ixgbe_mac_82599EB:
+ if (incval > 0x00FFFFFFULL)
+ e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n");
IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
(1 << IXGBE_INCPER_SHIFT_82599) |
- incval);
+ ((u32)incval & 0x00FFFFFFUL));
break;
default:
break;
@@ -250,6 +406,43 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
}
/**
+ * ixgbe_ptp_adjfreq_X550
+ * @ptp: the ptp clock structure
+ * @ppb: parts per billion adjustment from base
+ *
+ * adjust the frequency of the SYSTIME registers by the indicated ppb from base
+ * frequency
+ */
+static int ixgbe_ptp_adjfreq_X550(struct ptp_clock_info *ptp, s32 ppb)
+{
+ struct ixgbe_adapter *adapter =
+ container_of(ptp, struct ixgbe_adapter, ptp_caps);
+ struct ixgbe_hw *hw = &adapter->hw;
+ int neg_adj = 0;
+ u64 rate = IXGBE_X550_BASE_PERIOD;
+ u32 inca;
+
+ if (ppb < 0) {
+ neg_adj = 1;
+ ppb = -ppb;
+ }
+ rate *= ppb;
+ rate = div_u64(rate, 1000000000ULL);
+
+ /* warn if rate is too large */
+ if (rate >= INCVALUE_MASK)
+ e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n");
+
+ inca = rate & INCVALUE_MASK;
+ if (neg_adj)
+ inca |= ISGN;
+
+ IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, inca);
+
+ return 0;
+}
+
+/**
* ixgbe_ptp_adjtime
* @ptp: the ptp clock structure
* @delta: offset to adjust the cycle counter by
@@ -263,10 +456,11 @@ static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
unsigned long flags;
spin_lock_irqsave(&adapter->tmreg_lock, flags);
- timecounter_adjtime(&adapter->tc, delta);
+ timecounter_adjtime(&adapter->hw_tc, delta);
spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
- ixgbe_ptp_setup_sdp(adapter);
+ if (adapter->ptp_setup_sdp)
+ adapter->ptp_setup_sdp(adapter);
return 0;
}
@@ -283,11 +477,11 @@ static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
{
struct ixgbe_adapter *adapter =
container_of(ptp, struct ixgbe_adapter, ptp_caps);
- u64 ns;
unsigned long flags;
+ u64 ns;
spin_lock_irqsave(&adapter->tmreg_lock, flags);
- ns = timecounter_read(&adapter->tc);
+ ns = timecounter_read(&adapter->hw_tc);
spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
*ts = ns_to_timespec64(ns);
@@ -308,17 +502,16 @@ static int ixgbe_ptp_settime(struct ptp_clock_info *ptp,
{
struct ixgbe_adapter *adapter =
container_of(ptp, struct ixgbe_adapter, ptp_caps);
- u64 ns;
unsigned long flags;
-
- ns = timespec64_to_ns(ts);
+ u64 ns = timespec64_to_ns(ts);
/* reset the timecounter */
spin_lock_irqsave(&adapter->tmreg_lock, flags);
- timecounter_init(&adapter->tc, &adapter->cc, ns);
+ timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns);
spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
- ixgbe_ptp_setup_sdp(adapter);
+ if (adapter->ptp_setup_sdp)
+ adapter->ptp_setup_sdp(adapter);
return 0;
}
@@ -343,33 +536,26 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp,
* event when the clock SDP triggers. Clear mask when PPS is
* disabled
*/
- if (rq->type == PTP_CLK_REQ_PPS) {
- switch (adapter->hw.mac.type) {
- case ixgbe_mac_X540:
- if (on)
- adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED;
- else
- adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
-
- ixgbe_ptp_setup_sdp(adapter);
- return 0;
- default:
- break;
- }
- }
+ if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp)
+ return -ENOTSUPP;
+
+ if (on)
+ adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED;
+ else
+ adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
- return -ENOTSUPP;
+ adapter->ptp_setup_sdp(adapter);
+ return 0;
}
/**
* ixgbe_ptp_check_pps_event
* @adapter: the private adapter structure
- * @eicr: the interrupt cause register value
*
* This function is called by the interrupt routine when checking for
* interrupts. It will check and handle a pps event.
*/
-void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr)
+void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
struct ptp_clock_event event;
@@ -425,7 +611,9 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+ struct ixgbe_ring *rx_ring;
unsigned long rx_event;
+ int n;
/* if we don't have a valid timestamp in the registers, just update the
* timeout counter and exit
@@ -437,19 +625,43 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter)
/* determine the most recent watchdog or rx_timestamp event */
rx_event = adapter->last_rx_ptp_check;
- if (time_after(adapter->last_rx_timestamp, rx_event))
- rx_event = adapter->last_rx_timestamp;
+ for (n = 0; n < adapter->num_rx_queues; n++) {
+ rx_ring = adapter->rx_ring[n];
+ if (time_after(rx_ring->last_rx_timestamp, rx_event))
+ rx_event = rx_ring->last_rx_timestamp;
+ }
/* only need to read the high RXSTMP register to clear the lock */
- if (time_is_before_jiffies(rx_event + 5*HZ)) {
+ if (time_is_before_jiffies(rx_event + 5 * HZ)) {
IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
adapter->last_rx_ptp_check = jiffies;
+ adapter->rx_hwtstamp_cleared++;
e_warn(drv, "clearing RX Timestamp hang\n");
}
}
/**
+ * ixgbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state
+ * @adapter: the private adapter structure
+ *
+ * This function should be called whenever the state related to a Tx timestamp
+ * needs to be cleared. This helps ensure that all related bits are reset for
+ * the next Tx timestamp event.
+ */
+static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
+ if (adapter->ptp_tx_skb) {
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ }
+ clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+}
+
+/**
* ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp
* @adapter: the private adapter struct
*
@@ -461,23 +673,15 @@ static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
struct skb_shared_hwtstamps shhwtstamps;
- u64 regval = 0, ns;
- unsigned long flags;
+ u64 regval = 0;
regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL);
regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32;
- spin_lock_irqsave(&adapter->tmreg_lock, flags);
- ns = timecounter_cyc2time(&adapter->tc, regval);
- spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
- memset(&shhwtstamps, 0, sizeof(shhwtstamps));
- shhwtstamps.hwtstamp = ns_to_ktime(ns);
+ ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval);
skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
- dev_kfree_skb_any(adapter->ptp_tx_skb);
- adapter->ptp_tx_skb = NULL;
- clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+ ixgbe_ptp_clear_tx_timestamp(adapter);
}
/**
@@ -497,38 +701,85 @@ static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work)
IXGBE_PTP_TX_TIMEOUT);
u32 tsynctxctl;
- if (timeout) {
- dev_kfree_skb_any(adapter->ptp_tx_skb);
- adapter->ptp_tx_skb = NULL;
- clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
- e_warn(drv, "clearing Tx Timestamp hang\n");
+ /* we have to have a valid skb to poll for a timestamp */
+ if (!adapter->ptp_tx_skb) {
+ ixgbe_ptp_clear_tx_timestamp(adapter);
return;
}
+ /* stop polling once we have a valid timestamp */
tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
- if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID)
+ if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) {
ixgbe_ptp_tx_hwtstamp(adapter);
- else
+ return;
+ }
+
+ if (timeout) {
+ ixgbe_ptp_clear_tx_timestamp(adapter);
+ adapter->tx_hwtstamp_timeouts++;
+ e_warn(drv, "clearing Tx Timestamp hang\n");
+ } else {
/* reschedule to keep checking if it's not available yet */
schedule_work(&adapter->ptp_tx_work);
+ }
}
/**
- * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp
- * @adapter: pointer to adapter struct
+ * ixgbe_ptp_rx_pktstamp - utility function to get RX time stamp from buffer
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: the packet
+ *
+ * This function will be called by the Rx routine of the timestamp for this
+ * packet is stored in the buffer. The value is stored in little endian format
+ * starting at the end of the packet data.
+ */
+void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *q_vector,
+ struct sk_buff *skb)
+{
+ __le64 regval;
+
+ /* copy the bits out of the skb, and then trim the skb length */
+ skb_copy_bits(skb, skb->len - IXGBE_TS_HDR_LEN, &regval,
+ IXGBE_TS_HDR_LEN);
+ __pskb_trim(skb, skb->len - IXGBE_TS_HDR_LEN);
+
+ /* The timestamp is recorded in little endian format, and is stored at
+ * the end of the packet.
+ *
+ * DWORD: N N + 1 N + 2
+ * Field: End of Packet SYSTIMH SYSTIML
+ */
+ ixgbe_ptp_convert_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+ le64_to_cpu(regval));
+}
+
+/**
+ * ixgbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp
+ * @q_vector: structure containing interrupt and ring information
* @skb: particular skb to send timestamp with
*
* if the timestamp is valid, we convert it into the timecounter ns
* value, then store that result into the shhwtstamps structure which
* is passed up the network stack
*/
-void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb)
+void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector,
+ struct sk_buff *skb)
{
- struct ixgbe_hw *hw = &adapter->hw;
- struct skb_shared_hwtstamps *shhwtstamps;
- u64 regval = 0, ns;
+ struct ixgbe_adapter *adapter;
+ struct ixgbe_hw *hw;
+ u64 regval = 0;
u32 tsyncrxctl;
- unsigned long flags;
+
+ /* we cannot process timestamps on a ring without a q_vector */
+ if (!q_vector || !q_vector->adapter)
+ return;
+
+ adapter = q_vector->adapter;
+ hw = &adapter->hw;
+
+ /* Read the tsyncrxctl register afterwards in order to prevent taking an
+ * I/O hit on every packet.
+ */
tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID))
@@ -537,17 +788,7 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb)
regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL);
regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32;
- spin_lock_irqsave(&adapter->tmreg_lock, flags);
- ns = timecounter_cyc2time(&adapter->tc, regval);
- spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
- shhwtstamps = skb_hwtstamps(skb);
- shhwtstamps->hwtstamp = ns_to_ktime(ns);
-
- /* Update the last_rx_timestamp timer in order to enable watchdog check
- * for error case of latched timestamp on a dropped packet.
- */
- adapter->last_rx_timestamp = jiffies;
+ ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
}
int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
@@ -610,14 +851,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
case HWTSTAMP_FILTER_NONE:
tsync_rx_ctl = 0;
tsync_rx_mtrl = 0;
+ adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
break;
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG;
+ adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
break;
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG;
+ adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
break;
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
@@ -631,9 +878,21 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
is_l2 = true;
config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
case HWTSTAMP_FILTER_ALL:
+ /* The X550 controller is capable of timestamping all packets,
+ * which allows it to accept any filter.
+ */
+ if (hw->mac.type >= ixgbe_mac_X550) {
+ tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL;
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+ adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+ break;
+ }
+ /* fall through */
default:
/*
* register RXMTRL must be set in order to do V1 packets,
@@ -641,16 +900,46 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
* Delay_Req messages and hardware does not support
* timestamping all packets => return error
*/
+ adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
config->rx_filter = HWTSTAMP_FILTER_NONE;
return -ERANGE;
}
if (hw->mac.type == ixgbe_mac_82598EB) {
+ adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
if (tsync_rx_ctl | tsync_tx_ctl)
return -ERANGE;
return 0;
}
+ /* Per-packet timestamping only works if the filter is set to all
+ * packets. Since this is desired, always timestamp all packets as long
+ * as any Rx filter was configured.
+ */
+ switch (hw->mac.type) {
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ /* enable timestamping all packets only if at least some
+ * packets were requested. Otherwise, play nice and disable
+ * timestamping
+ */
+ if (config->rx_filter == HWTSTAMP_FILTER_NONE)
+ break;
+
+ tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED |
+ IXGBE_TSYNCRXCTL_TYPE_ALL |
+ IXGBE_TSYNCRXCTL_TSIP_UT_EN;
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+ adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+ adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER;
+ is_l2 = true;
+ break;
+ default:
+ break;
+ }
+
/* define ethertype filter for timestamping L2 packets */
if (is_l2)
IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588),
@@ -678,8 +967,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
IXGBE_WRITE_FLUSH(hw);
/* clear TX/RX time stamp registers, just to be sure */
- regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
- regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
+ ixgbe_ptp_clear_tx_timestamp(adapter);
+ IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
return 0;
}
@@ -712,23 +1001,9 @@ int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
-EFAULT : 0;
}
-/**
- * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw
- * @adapter: pointer to the adapter structure
- *
- * This function should be called to set the proper values for the TIMINCA
- * register and tell the cyclecounter structure what the tick rate of SYSTIME
- * is. It does not directly modify SYSTIME registers or the timecounter
- * structure. It should be called whenever a new TIMINCA value is necessary,
- * such as during initialization or when the link speed changes.
- */
-void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter,
+ u32 *shift, u32 *incval)
{
- struct ixgbe_hw *hw = &adapter->hw;
- u32 incval = 0;
- u32 shift = 0;
- unsigned long flags;
-
/**
* Scale the NIC cycle counter by a large factor so that
* relatively small corrections to the frequency can be added
@@ -745,36 +1020,98 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
*/
switch (adapter->link_speed) {
case IXGBE_LINK_SPEED_100_FULL:
- incval = IXGBE_INCVAL_100;
- shift = IXGBE_INCVAL_SHIFT_100;
+ *shift = IXGBE_INCVAL_SHIFT_100;
+ *incval = IXGBE_INCVAL_100;
break;
case IXGBE_LINK_SPEED_1GB_FULL:
- incval = IXGBE_INCVAL_1GB;
- shift = IXGBE_INCVAL_SHIFT_1GB;
+ *shift = IXGBE_INCVAL_SHIFT_1GB;
+ *incval = IXGBE_INCVAL_1GB;
break;
case IXGBE_LINK_SPEED_10GB_FULL:
default:
- incval = IXGBE_INCVAL_10GB;
- shift = IXGBE_INCVAL_SHIFT_10GB;
+ *shift = IXGBE_INCVAL_SHIFT_10GB;
+ *incval = IXGBE_INCVAL_10GB;
break;
}
+}
- /**
- * Modify the calculated values to fit within the correct
- * number of bits specified by the hardware. The 82599 doesn't
- * have the same space as the X540, so bitshift the calculated
- * values to fit.
+/**
+ * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw
+ * @adapter: pointer to the adapter structure
+ *
+ * This function should be called to set the proper values for the TIMINCA
+ * register and tell the cyclecounter structure what the tick rate of SYSTIME
+ * is. It does not directly modify SYSTIME registers or the timecounter
+ * structure. It should be called whenever a new TIMINCA value is necessary,
+ * such as during initialization or when the link speed changes.
+ */
+void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct cyclecounter cc;
+ unsigned long flags;
+ u32 incval = 0;
+ u32 tsauxc = 0;
+ u32 fuse0 = 0;
+
+ /* For some of the boards below this mask is technically incorrect.
+ * The timestamp mask overflows at approximately 61bits. However the
+ * particular hardware does not overflow on an even bitmask value.
+ * Instead, it overflows due to conversion of upper 32bits billions of
+ * cycles. Timecounters are not really intended for this purpose so
+ * they do not properly function if the overflow point isn't 2^N-1.
+ * However, the actual SYSTIME values in question take ~138 years to
+ * overflow. In practice this means they won't actually overflow. A
+ * proper fix to this problem would require modification of the
+ * timecounter delta calculations.
*/
+ cc.mask = CLOCKSOURCE_MASK(64);
+ cc.mult = 1;
+ cc.shift = 0;
+
switch (hw->mac.type) {
+ case ixgbe_mac_X550EM_x:
+ /* SYSTIME assumes X550EM_x board frequency is 300Mhz, and is
+ * designed to represent seconds and nanoseconds when this is
+ * the case. However, some revisions of hardware have a 400Mhz
+ * clock and we have to compensate for this frequency
+ * variation using corrected mult and shift values.
+ */
+ fuse0 = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0));
+ if (!(fuse0 & IXGBE_FUSES0_300MHZ)) {
+ cc.mult = 3;
+ cc.shift = 2;
+ }
+ /* fallthrough */
+ case ixgbe_mac_X550:
+ cc.read = ixgbe_ptp_read_X550;
+
+ /* enable SYSTIME counter */
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+ tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
+ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
+ tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
+ IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
+
+ IXGBE_WRITE_FLUSH(hw);
+ break;
case ixgbe_mac_X540:
+ cc.read = ixgbe_ptp_read_82599;
+
+ ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval);
IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval);
break;
case ixgbe_mac_82599EB:
+ cc.read = ixgbe_ptp_read_82599;
+
+ ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval);
incval >>= IXGBE_INCVAL_SHIFT_82599;
- shift -= IXGBE_INCVAL_SHIFT_82599;
+ cc.shift -= IXGBE_INCVAL_SHIFT_82599;
IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
- (1 << IXGBE_INCPER_SHIFT_82599) |
- incval);
+ (1 << IXGBE_INCPER_SHIFT_82599) | incval);
break;
default:
/* other devices aren't supported */
@@ -787,13 +1124,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
/* need lock to prevent incorrect read while modifying cyclecounter */
spin_lock_irqsave(&adapter->tmreg_lock, flags);
-
- memset(&adapter->cc, 0, sizeof(adapter->cc));
- adapter->cc.read = ixgbe_ptp_read;
- adapter->cc.mask = CYCLECOUNTER_MASK(64);
- adapter->cc.shift = shift;
- adapter->cc.mult = 1;
-
+ memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc));
spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
}
@@ -814,29 +1145,27 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
unsigned long flags;
- /* set SYSTIME registers to 0 just in case */
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000);
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000);
- IXGBE_WRITE_FLUSH(hw);
-
/* reset the hardware timestamping mode */
ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
+ /* 82598 does not support PTP */
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ return;
+
ixgbe_ptp_start_cyclecounter(adapter);
spin_lock_irqsave(&adapter->tmreg_lock, flags);
-
- /* reset the ns time counter */
- timecounter_init(&adapter->tc, &adapter->cc,
+ timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
ktime_to_ns(ktime_get_real()));
-
spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
- /*
- * Now that the shift has been calculated and the systime
+ adapter->last_overflow_check = jiffies;
+
+ /* Now that the shift has been calculated and the systime
* registers reset, (re-)enable the Clock out feature
*/
- ixgbe_ptp_setup_sdp(adapter);
+ if (adapter->ptp_setup_sdp)
+ adapter->ptp_setup_sdp(adapter);
}
/**
@@ -845,11 +1174,11 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
*
* This function performs setup of the user entry point function table and
* initializes the PTP clock device, which is used to access the clock-like
- * features of the PTP core. It will be called by ixgbe_ptp_init, only if
- * there isn't already a clock device (such as after a suspend/resume cycle,
- * where the clock device wasn't destroyed).
+ * features of the PTP core. It will be called by ixgbe_ptp_init, and may
+ * reuse a previously initialized clock (such as during a suspend/resume
+ * cycle).
*/
-static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
+static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
long err;
@@ -869,11 +1198,12 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
adapter->ptp_caps.n_ext_ts = 0;
adapter->ptp_caps.n_per_out = 0;
adapter->ptp_caps.pps = 1;
- adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq;
+ adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+ adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540;
break;
case ixgbe_mac_82599EB:
snprintf(adapter->ptp_caps.name,
@@ -885,14 +1215,31 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
adapter->ptp_caps.n_ext_ts = 0;
adapter->ptp_caps.n_per_out = 0;
adapter->ptp_caps.pps = 0;
- adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq;
+ adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
+ adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
+ adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+ adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
+ adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+ break;
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name);
+ adapter->ptp_caps.owner = THIS_MODULE;
+ adapter->ptp_caps.max_adj = 30000000;
+ adapter->ptp_caps.n_alarm = 0;
+ adapter->ptp_caps.n_ext_ts = 0;
+ adapter->ptp_caps.n_per_out = 0;
+ adapter->ptp_caps.pps = 0;
+ adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+ adapter->ptp_setup_sdp = NULL;
break;
default:
adapter->ptp_clock = NULL;
+ adapter->ptp_setup_sdp = NULL;
return -EOPNOTSUPP;
}
@@ -961,18 +1308,13 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter)
if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state))
return;
- /* since this might be called in suspend, we don't clear the state,
- * but simply reset the auxiliary PPS signal control register
- */
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_TSAUXC, 0x0);
+ adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
+ if (adapter->ptp_setup_sdp)
+ adapter->ptp_setup_sdp(adapter);
/* ensure that we cancel any pending PTP Tx work item in progress */
cancel_work_sync(&adapter->ptp_tx_work);
- if (adapter->ptp_tx_skb) {
- dev_kfree_skb_any(adapter->ptp_tx_skb);
- adapter->ptp_tx_skb = NULL;
- clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
- }
+ ixgbe_ptp_clear_tx_timestamp(adapter);
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index fcd8b27a0ccb..31de6cf7adb0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
+ Copyright(c) 1999 - 2015 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -130,6 +130,38 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter)
return -ENOMEM;
}
+/**
+ * ixgbe_get_vfs - Find and take references to all vf devices
+ * @adapter: Pointer to adapter struct
+ */
+static void ixgbe_get_vfs(struct ixgbe_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ u16 vendor = pdev->vendor;
+ struct pci_dev *vfdev;
+ int vf = 0;
+ u16 vf_id;
+ int pos;
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ if (!pos)
+ return;
+ pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
+
+ vfdev = pci_get_device(vendor, vf_id, NULL);
+ for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) {
+ if (!vfdev->is_virtfn)
+ continue;
+ if (vfdev->physfn != pdev)
+ continue;
+ if (vf >= adapter->num_vfs)
+ continue;
+ pci_dev_get(vfdev);
+ adapter->vfinfo[vf].vfdev = vfdev;
+ ++vf;
+ }
+}
+
/* Note this function is called when the user wants to enable SR-IOV
* VFs using the now deprecated module parameter
*/
@@ -170,8 +202,10 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter)
}
}
- if (!__ixgbe_enable_sriov(adapter))
+ if (!__ixgbe_enable_sriov(adapter)) {
+ ixgbe_get_vfs(adapter);
return;
+ }
/* If we have gotten to this point then there is no memory available
* to manage the VF devices - print message and bail.
@@ -184,6 +218,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter)
#endif /* #ifdef CONFIG_PCI_IOV */
int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
{
+ unsigned int num_vfs = adapter->num_vfs, vf;
struct ixgbe_hw *hw = &adapter->hw;
u32 gpie;
u32 vmdctl;
@@ -192,6 +227,16 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
/* set num VFs to 0 to prevent access to vfinfo */
adapter->num_vfs = 0;
+ /* put the reference to all of the vf devices */
+ for (vf = 0; vf < num_vfs; ++vf) {
+ struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+
+ if (!vfdev)
+ continue;
+ adapter->vfinfo[vf].vfdev = NULL;
+ pci_dev_put(vfdev);
+ }
+
/* free VF control structures */
kfree(adapter->vfinfo);
adapter->vfinfo = NULL;
@@ -289,6 +334,7 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
e_dev_warn("Failed to enable PCI sriov: %d\n", err);
return err;
}
+ ixgbe_get_vfs(adapter);
ixgbe_sriov_reinit(adapter);
return num_vfs;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 995f03107eac..1329eddfc9ce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1020,6 +1020,7 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */
#define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */
#define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */
+#define IXGBE_SYSTIMR 0x08C58 /* System time register Residue - RO */
#define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */
#define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */
#define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */
@@ -1036,6 +1037,7 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */
#define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
#define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
+#define IXGBE_TSIM 0x08C68 /* TimeSync Interrupt Mask Register - RW */
/* Diagnostic Registers */
#define IXGBE_RDSTATCTL 0x02C20
@@ -1345,7 +1347,10 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK 0xFF01 /* int chip-wide mask */
#define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG 0xFC01 /* int chip-wide mask */
#define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */
+#define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT 0x0010 /* device fault */
#define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */
+#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* global fault msg */
+#define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP 0x8007 /* high temp failure */
#define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */
/* autoneg vendor alarm int enable */
#define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000
@@ -1353,6 +1358,7 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN 0x1 /* vendor alarm int enable */
#define IXGBE_MDIO_GLOBAL_STD_ALM2_INT 0x200 /* vendor alarm2 int mask */
#define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN 0x4000 /* int high temp enable */
+#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN 0x0010 /*int dev fault enable */
#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */
#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */
@@ -2209,6 +2215,7 @@ enum {
#define IXGBE_TSAUXC_EN_CLK 0x00000004
#define IXGBE_TSAUXC_SYNCLK 0x00000008
#define IXGBE_TSAUXC_SDP0_INT 0x00000040
+#define IXGBE_TSAUXC_DISABLE_SYSTIME 0x80000000
#define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */
#define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */
@@ -2218,8 +2225,12 @@ enum {
#define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00
#define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02
#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04
+#define IXGBE_TSYNCRXCTL_TYPE_ALL 0x08
#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A
#define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */
+#define IXGBE_TSYNCRXCTL_TSIP_UT_EN 0x00800000 /* Rx Timestamp in Packet */
+
+#define IXGBE_TSIM_TXTS 0x00000002
#define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF
#define IXGBE_RXMTRL_V1_SYNC_MSG 0x00
@@ -2332,6 +2343,7 @@ enum {
#define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */
#define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */
#define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */
+#define IXGBE_RXD_STAT_TSIP 0x08000 /* Time Stamp in packet buffer */
#define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */
#define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */
#define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index c1d4584f6469..bf8225ceab8e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -57,8 +57,7 @@ s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw)
struct ixgbe_phy_info *phy = &hw->phy;
/* set_phy_power was set by default to NULL */
- if (!ixgbe_mng_present(hw))
- phy->ops.set_phy_power = ixgbe_set_copper_phy_power;
+ phy->ops.set_phy_power = ixgbe_set_copper_phy_power;
mac->mcft_size = IXGBE_X540_MC_TBL_SIZE;
mac->vft_size = IXGBE_X540_VFT_TBL_SIZE;
@@ -110,13 +109,14 @@ mac_reset_top:
ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
IXGBE_WRITE_FLUSH(hw);
+ usleep_range(1000, 1200);
/* Poll for reset bit to self-clear indicating reset is complete */
for (i = 0; i < 10; i++) {
- udelay(1);
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
if (!(ctrl & IXGBE_CTRL_RST_MASK))
break;
+ udelay(1);
}
if (ctrl & IXGBE_CTRL_RST_MASK) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index bf2ae8daf717..f4ef0d1a5dbe 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1444,7 +1444,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
IXGBE_MDIO_GLOBAL_ALARM_1_INT)))
return status;
- /* High temperature failure alarm triggered */
+ /* Global alarm triggered */
status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1,
IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
&reg);
@@ -1458,6 +1458,21 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
ixgbe_set_copper_phy_power(hw, false);
return IXGBE_ERR_OVERTEMP;
}
+ if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) {
+ /* device fault alarm triggered */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+ if (status)
+ return status;
+
+ /* if device fault was due to high temp alarm handle and exit */
+ if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) {
+ /* power down the PHY in case the PHY FW didn't */
+ ixgbe_set_copper_phy_power(hw, false);
+ return IXGBE_ERR_OVERTEMP;
+ }
+ }
/* Vendor alarm 2 triggered */
status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
@@ -1511,14 +1526,15 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
if (status)
return status;
- /* Enables high temperature failure alarm */
+ /* Enable high temperature failure and global fault alarms */
status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
&reg);
if (status)
return status;
- reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN;
+ reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN |
+ IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN);
status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
@@ -1727,6 +1743,12 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
return IXGBE_ERR_CONFIG;
+ if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
+ speed = IXGBE_LINK_SPEED_10GB_FULL |
+ IXGBE_LINK_SPEED_1GB_FULL;
+ return ixgbe_setup_kr_speed_x550em(hw, speed);
+ }
+
/* If link is not up, then there is no setup necessary so return */
status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
if (status)
@@ -1931,7 +1953,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
{
struct ixgbe_phy_info *phy = &hw->phy;
- ixgbe_link_speed speed;
s32 ret_val;
hw->mac.ops.set_lan_id(hw);
@@ -1944,10 +1965,6 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
* to determine internal PHY mode.
*/
phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL);
- if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
- speed = IXGBE_LINK_SPEED_10GB_FULL |
- IXGBE_LINK_SPEED_1GB_FULL;
- }
}
/* Identify the PHY or SFP module */
@@ -1979,14 +1996,8 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
/* If internal link mode is XFI, then setup iXFI internal link,
* else setup KR now.
*/
- if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
- phy->ops.setup_internal_link =
- ixgbe_setup_internal_phy_t_x550em;
- } else {
- speed = IXGBE_LINK_SPEED_10GB_FULL |
- IXGBE_LINK_SPEED_1GB_FULL;
- ret_val = ixgbe_setup_kr_speed_x550em(hw, speed);
- }
+ phy->ops.setup_internal_link =
+ ixgbe_setup_internal_phy_t_x550em;
/* setup SW LPLU only for first revision */
if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw,
@@ -2135,13 +2146,14 @@ mac_reset_top:
ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
IXGBE_WRITE_FLUSH(hw);
+ usleep_range(1000, 1200);
/* Poll for reset bit to self-clear meaning reset is complete */
for (i = 0; i < 10; i++) {
- udelay(1);
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
if (!(ctrl & IXGBE_CTRL_RST_MASK))
break;
+ udelay(1);
}
if (ctrl & IXGBE_CTRL_RST_MASK) {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index dbbd1be47462..f098952d4fb4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -59,7 +59,7 @@ static const char ixgbevf_driver_string[] =
#define DRV_VERSION "2.12.1-k"
const char ixgbevf_driver_version[] = DRV_VERSION;
static char ixgbevf_copyright[] =
- "Copyright (c) 2009 - 2012 Intel Corporation.";
+ "Copyright (c) 2009 - 2015 Intel Corporation.";
static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
[board_82599_vf] = &ixgbevf_82599_vf_info,
@@ -96,12 +96,14 @@ static int debug = -1;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+static struct workqueue_struct *ixgbevf_wq;
+
static void ixgbevf_service_event_schedule(struct ixgbevf_adapter *adapter)
{
if (!test_bit(__IXGBEVF_DOWN, &adapter->state) &&
!test_bit(__IXGBEVF_REMOVING, &adapter->state) &&
!test_and_set_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state))
- schedule_work(&adapter->service_task);
+ queue_work(ixgbevf_wq, &adapter->service_task);
}
static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
@@ -1332,7 +1334,6 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
int txr_remaining = adapter->num_tx_queues;
int i, j;
int rqpv, tqpv;
- int err = 0;
q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
@@ -1345,7 +1346,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
for (; txr_idx < txr_remaining; v_start++, txr_idx++)
map_vector_to_txq(adapter, v_start, txr_idx);
- goto out;
+ return 0;
}
/* If we don't have enough vectors for a 1-to-1
@@ -1370,8 +1371,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
}
}
-out:
- return err;
+ return 0;
}
/**
@@ -1469,9 +1469,7 @@ static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
**/
static int ixgbevf_request_irq(struct ixgbevf_adapter *adapter)
{
- int err = 0;
-
- err = ixgbevf_request_msix_irqs(adapter);
+ int err = ixgbevf_request_msix_irqs(adapter);
if (err)
hw_dbg(&adapter->hw, "request_irq failed, Error %d\n", err);
@@ -1830,7 +1828,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev,
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
struct ixgbe_hw *hw = &adapter->hw;
- int err = -EOPNOTSUPP;
+ int err;
spin_lock_bh(&adapter->mbx_lock);
@@ -2046,7 +2044,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
ixgbe_mbox_api_11,
ixgbe_mbox_api_10,
ixgbe_mbox_api_unknown };
- int err = 0, idx = 0;
+ int err, idx = 0;
spin_lock_bh(&adapter->mbx_lock);
@@ -2419,7 +2417,7 @@ err_allocation:
static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
- int err = 0;
+ int err;
int vector, v_budget;
/* It's easy to be greedy for MSI-X vectors, but it really
@@ -2437,26 +2435,21 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
*/
adapter->msix_entries = kcalloc(v_budget,
sizeof(struct msix_entry), GFP_KERNEL);
- if (!adapter->msix_entries) {
- err = -ENOMEM;
- goto out;
- }
+ if (!adapter->msix_entries)
+ return -ENOMEM;
for (vector = 0; vector < v_budget; vector++)
adapter->msix_entries[vector].entry = vector;
err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
if (err)
- goto out;
+ return err;
err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
if (err)
- goto out;
-
- err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+ return err;
-out:
- return err;
+ return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
}
/**
@@ -3351,6 +3344,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
if (skb->ip_summed == CHECKSUM_PARTIAL) {
u8 l4_hdr = 0;
+ __be16 frag_off;
switch (first->protocol) {
case htons(ETH_P_IP):
@@ -3361,13 +3355,16 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
case htons(ETH_P_IPV6):
vlan_macip_lens |= skb_network_header_len(skb);
l4_hdr = ipv6_hdr(skb)->nexthdr;
+ if (likely(skb_network_header_len(skb) ==
+ sizeof(struct ipv6hdr)))
+ break;
+ ipv6_skip_exthdr(skb, skb_network_offset(skb) +
+ sizeof(struct ipv6hdr),
+ &l4_hdr, &frag_off);
+ if (unlikely(frag_off))
+ l4_hdr = NEXTHDR_FRAGMENT;
break;
default:
- if (unlikely(net_ratelimit())) {
- dev_warn(tx_ring->dev,
- "partial checksum but proto=%x!\n",
- first->protocol);
- }
break;
}
@@ -3389,16 +3386,18 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
default:
if (unlikely(net_ratelimit())) {
dev_warn(tx_ring->dev,
- "partial checksum but l4 proto=%x!\n",
- l4_hdr);
+ "partial checksum, l3 proto=%x, l4 proto=%x\n",
+ first->protocol, l4_hdr);
}
- break;
+ skb_checksum_help(skb);
+ goto no_csum;
}
/* update TX checksum flag */
first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
}
+no_csum:
/* vlan_macip_lens: MACLEN, VLAN tag */
vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
@@ -4244,15 +4243,17 @@ static struct pci_driver ixgbevf_driver = {
**/
static int __init ixgbevf_init_module(void)
{
- int ret;
-
pr_info("%s - version %s\n", ixgbevf_driver_string,
ixgbevf_driver_version);
pr_info("%s\n", ixgbevf_copyright);
+ ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name);
+ if (!ixgbevf_wq) {
+ pr_err("%s: Failed to create workqueue\n", ixgbevf_driver_name);
+ return -ENOMEM;
+ }
- ret = pci_register_driver(&ixgbevf_driver);
- return ret;
+ return pci_register_driver(&ixgbevf_driver);
}
module_init(ixgbevf_init_module);
@@ -4266,6 +4267,10 @@ module_init(ixgbevf_init_module);
static void __exit ixgbevf_exit_module(void)
{
pci_unregister_driver(&ixgbevf_driver);
+ if (ixgbevf_wq) {
+ destroy_workqueue(ixgbevf_wq);
+ ixgbevf_wq = NULL;
+ }
}
#ifdef DEBUG
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 26a68b8af2c5..a0755919ccaf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
- mad.o transobj.o vport.o
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \
+ mad.o transobj.o vport.o sriov.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \
en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \
en_txrx.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 69f1c1a412b4..89313d46952d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -465,6 +465,7 @@ enum {
};
struct mlx5e_vlan_db {
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
u32 active_vlans_ft_ix[VLAN_N_VID];
u32 untagged_rule_ft_ix;
u32 any_vlan_rule_ft_ix;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
index 22d603f78273..5b93c9c6e341 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
@@ -502,6 +502,49 @@ add_eth_addr_rule_out:
return err;
}
+static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
+{
+ struct net_device *ndev = priv->netdev;
+ int max_list_size;
+ int list_size;
+ u16 *vlans;
+ int vlan;
+ int err;
+ int i;
+
+ list_size = 0;
+ for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID)
+ list_size++;
+
+ max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
+
+ if (list_size > max_list_size) {
+ netdev_warn(ndev,
+ "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
+ list_size = max_list_size;
+ }
+
+ vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+ if (!vlans)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) {
+ if (i >= list_size)
+ break;
+ vlans[i++] = vlan;
+ }
+
+ err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size);
+ if (err)
+ netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
+ err);
+
+ kfree(vlans);
+ return err;
+}
+
enum mlx5e_vlan_rule_type {
MLX5E_VLAN_RULE_TYPE_UNTAGGED,
MLX5E_VLAN_RULE_TYPE_ANY_VID,
@@ -552,6 +595,10 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
1);
break;
default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
+ err = mlx5e_vport_context_update_vlans(priv);
+ if (err)
+ goto add_vlan_rule_out;
+
ft_ix = &priv->vlan.active_vlans_ft_ix[vid];
MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag,
1);
@@ -588,6 +635,7 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
mlx5_del_flow_table_entry(priv->ft.vlan,
priv->vlan.active_vlans_ft_ix[vid]);
+ mlx5e_vport_context_update_vlans(priv);
break;
}
}
@@ -619,6 +667,8 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
{
struct mlx5e_priv *priv = netdev_priv(dev);
+ set_bit(vid, priv->vlan.active_vlans);
+
return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
}
@@ -627,6 +677,8 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
{
struct mlx5e_priv *priv = netdev_priv(dev);
+ clear_bit(vid, priv->vlan.active_vlans);
+
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
return 0;
@@ -671,6 +723,91 @@ static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
netif_addr_unlock_bh(netdev);
}
+static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
+ u8 addr_array[][ETH_ALEN], int size)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct net_device *ndev = priv->netdev;
+ struct mlx5e_eth_addr_hash_node *hn;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int i = 0;
+ int hi;
+
+ addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc;
+
+ if (is_uc) /* Make sure our own address is pushed first */
+ ether_addr_copy(addr_array[i++], ndev->dev_addr);
+ else if (priv->eth_addr.broadcast_enabled)
+ ether_addr_copy(addr_array[i++], ndev->broadcast);
+
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
+ if (ether_addr_equal(ndev->dev_addr, hn->ai.addr))
+ continue;
+ if (i >= size)
+ break;
+ ether_addr_copy(addr_array[i++], hn->ai.addr);
+ }
+}
+
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
+ int list_type)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct mlx5e_eth_addr_hash_node *hn;
+ u8 (*addr_array)[ETH_ALEN] = NULL;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int max_size;
+ int size;
+ int err;
+ int hi;
+
+ size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0);
+ max_size = is_uc ?
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list);
+
+ addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc;
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
+ size++;
+
+ if (size > max_size) {
+ netdev_warn(priv->netdev,
+ "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
+ size = max_size;
+ }
+
+ if (size) {
+ addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!addr_array) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mlx5e_fill_addr_array(priv, list_type, addr_array, size);
+ }
+
+ err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size);
+out:
+ if (err)
+ netdev_err(priv->netdev,
+ "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
+ kfree(addr_array);
+}
+
+static void mlx5e_vport_context_update(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_db *ea = &priv->eth_addr;
+
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC);
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC);
+ mlx5_modify_nic_vport_promisc(priv->mdev, 0,
+ ea->allmulti_enabled,
+ ea->promisc_enabled);
+}
+
static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv)
{
struct mlx5e_eth_addr_hash_node *hn;
@@ -748,6 +885,8 @@ void mlx5e_set_rx_mode_work(struct work_struct *work)
ea->promisc_enabled = promisc_enabled;
ea->allmulti_enabled = allmulti_enabled;
ea->broadcast_enabled = broadcast_enabled;
+
+ mlx5e_vport_context_update(priv);
}
void mlx5e_init_eth_addr(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f6a8cc787603..d67058afe87e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -32,6 +32,7 @@
#include <linux/mlx5/flow_table.h>
#include "en.h"
+#include "eswitch.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
@@ -63,7 +64,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
u8 port_state;
port_state = mlx5_query_vport_state(mdev,
- MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
if (port_state == VPORT_STATE_UP)
netif_carrier_on(priv->netdev);
@@ -1931,6 +1932,79 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
return err;
}
+static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
+}
+
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
+ vlan, qos);
+}
+
+static int mlx5_vport_link2ifla(u8 esw_link)
+{
+ switch (esw_link) {
+ case MLX5_ESW_VPORT_ADMIN_STATE_DOWN:
+ return IFLA_VF_LINK_STATE_DISABLE;
+ case MLX5_ESW_VPORT_ADMIN_STATE_UP:
+ return IFLA_VF_LINK_STATE_ENABLE;
+ }
+ return IFLA_VF_LINK_STATE_AUTO;
+}
+
+static int mlx5_ifla_link2vport(u8 ifla_link)
+{
+ switch (ifla_link) {
+ case IFLA_VF_LINK_STATE_DISABLE:
+ return MLX5_ESW_VPORT_ADMIN_STATE_DOWN;
+ case IFLA_VF_LINK_STATE_ENABLE:
+ return MLX5_ESW_VPORT_ADMIN_STATE_UP;
+ }
+ return MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
+}
+
+static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
+ int link_state)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
+ mlx5_ifla_link2vport(link_state));
+}
+
+static int mlx5e_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
+ if (err)
+ return err;
+ ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
+ return 0;
+}
+
+static int mlx5e_get_vf_stats(struct net_device *dev,
+ int vf, struct ifla_vf_stats *vf_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
+ vf_stats);
+}
+
static struct net_device_ops mlx5e_netdev_ops = {
.ndo_open = mlx5e_open,
.ndo_stop = mlx5e_close,
@@ -1941,7 +2015,7 @@ static struct net_device_ops mlx5e_netdev_ops = {
.ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid,
.ndo_set_features = mlx5e_set_features,
- .ndo_change_mtu = mlx5e_change_mtu,
+ .ndo_change_mtu = mlx5e_change_mtu
};
static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2028,7 +2102,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr);
+ mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
}
static void mlx5e_build_netdev(struct net_device *netdev)
@@ -2041,6 +2115,14 @@ static void mlx5e_build_netdev(struct net_device *netdev)
if (priv->params.num_tc > 1)
mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue;
+ if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+ mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac;
+ mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan;
+ mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config;
+ mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state;
+ mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats;
+ }
+
netdev->netdev_ops = &mlx5e_netdev_ops;
netdev->watchdog_timeo = 15 * HZ;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 713ead583347..23c244a7e5d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -35,6 +35,9 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#ifdef CONFIG_MLX5_CORE_EN
+#include "eswitch.h"
+#endif
enum {
MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
@@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
break;
#endif
+#ifdef CONFIG_MLX5_CORE_EN
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
+ break;
+#endif
default:
mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
eqe->type, eq->eqn);
@@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, pg))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
+ MLX5_CAP_GEN(dev, vport_group_manager) &&
+ mlx5_core_is_pf(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
+
err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
"mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
new file mode 100644
index 000000000000..d8939e597c54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -0,0 +1,1282 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/flow_table.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+#define UPLINK_VPORT 0xFFFF
+
+#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
+
+#define esw_info(dev, format, ...) \
+ pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_warn(dev, format, ...) \
+ pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_debug(dev, format, ...) \
+ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+
+enum {
+ MLX5_ACTION_NONE = 0,
+ MLX5_ACTION_ADD = 1,
+ MLX5_ACTION_DEL = 2,
+};
+
+/* E-Switch UC L2 table hash node */
+struct esw_uc_addr {
+ struct l2addr_node node;
+ u32 table_index;
+ u32 vport;
+};
+
+/* E-Switch MC FDB table hash node */
+struct esw_mc_addr { /* SRIOV only */
+ struct l2addr_node node;
+ struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */
+ u32 refcnt;
+};
+
+/* Vport UC/MC hash node */
+struct vport_addr {
+ struct l2addr_node node;
+ u8 action;
+ u32 vport;
+ struct mlx5_flow_rule *flow_rule; /* SRIOV only */
+};
+
+enum {
+ UC_ADDR_CHANGE = BIT(0),
+ MC_ADDR_CHANGE = BIT(1),
+};
+
+/* Vport context events */
+#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \
+ MC_ADDR_CHANGE)
+
+static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
+ u32 events_mask)
+{
+ int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)];
+ int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int err;
+
+ memset(out, 0, sizeof(out));
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1);
+
+ if (events_mask & UC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_uc_address_change, 1);
+ if (events_mask & MC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_mc_address_change, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ goto ex;
+ err = mlx5_cmd_status_to_err_v2(out);
+ if (err)
+ goto ex;
+ return 0;
+ex:
+ return err;
+}
+
+/* E-Switch vport context HW commands */
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)];
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+
+ MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
+
+ return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+}
+
+static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+ u16 *vlan, u8 *qos)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)];
+ int err;
+ bool cvlan_strip;
+ bool cvlan_insert;
+
+ memset(out, 0, sizeof(out));
+
+ *vlan = 0;
+ *qos = 0;
+
+ if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
+ !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
+ return -ENOTSUPP;
+
+ err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
+ if (err)
+ goto out;
+
+ cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.vport_cvlan_strip);
+
+ cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.vport_cvlan_insert);
+
+ if (cvlan_strip || cvlan_insert) {
+ *vlan = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.cvlan_id);
+ *qos = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.cvlan_pcp);
+ }
+
+ esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
+ vport, *vlan, *qos);
+out:
+ return err;
+}
+
+static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+ void *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)];
+
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+
+ MLX5_SET(modify_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
+
+ return mlx5_cmd_exec_check_status(dev, in, inlen,
+ out, sizeof(out));
+}
+
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+ u16 vlan, u8 qos, bool set)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)];
+
+ memset(in, 0, sizeof(in));
+
+ if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
+ !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
+ return -ENOTSUPP;
+
+ esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
+ vport, vlan, qos, set);
+
+ if (set) {
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_strip, 1);
+ /* insert only if no vlan in packet */
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_insert, 1);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_pcp, qos);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_id, vlan);
+ }
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_strip, 1);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_insert, 1);
+
+ return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
+}
+
+/* HW L2 Table (MPFS) management */
+static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
+ u8 *mac, u8 vlan_valid, u16 vlan)
+{
+ u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)];
+ u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)];
+ u8 *in_mac_addr;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(set_l2_table_entry_in, in, opcode,
+ MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
+ MLX5_SET(set_l2_table_entry_in, in, table_index, index);
+ MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid);
+ MLX5_SET(set_l2_table_entry_in, in, vlan, vlan);
+
+ in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
+ ether_addr_copy(&in_mac_addr[2], mac);
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+ out, sizeof(out));
+}
+
+static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)];
+ u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)];
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(delete_l2_table_entry_in, in, opcode,
+ MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+ MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+ out, sizeof(out));
+}
+
+static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
+{
+ int err = 0;
+
+ *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size);
+ if (*ix >= l2_table->size)
+ err = -ENOSPC;
+ else
+ __set_bit(*ix, l2_table->bitmap);
+
+ return err;
+}
+
+static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix)
+{
+ __clear_bit(ix, l2_table->bitmap);
+}
+
+static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac,
+ u8 vlan_valid, u16 vlan,
+ u32 *index)
+{
+ struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
+ int err;
+
+ err = alloc_l2_table_index(l2_table, index);
+ if (err)
+ return err;
+
+ err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan);
+ if (err)
+ free_l2_table_index(l2_table, *index);
+
+ return err;
+}
+
+static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
+{
+ struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
+
+ del_l2_table_entry_cmd(dev, index);
+ free_l2_table_index(l2_table, index);
+}
+
+/* E-Switch FDB flow steering */
+struct dest_node {
+ struct list_head list;
+ struct mlx5_flow_destination dest;
+};
+
+static int _mlx5_flow_rule_apply(struct mlx5_flow_rule *fr)
+{
+ bool was_valid = fr->valid;
+ struct dest_node *dest_n;
+ u32 dest_list_size = 0;
+ void *in_match_value;
+ u32 *flow_context;
+ u32 flow_index;
+ int err;
+ int i;
+
+ if (list_empty(&fr->dest_list)) {
+ if (fr->valid)
+ mlx5_del_flow_table_entry(fr->ft, fr->fi);
+ fr->valid = false;
+ return 0;
+ }
+
+ list_for_each_entry(dest_n, &fr->dest_list, list)
+ dest_list_size++;
+
+ flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) +
+ MLX5_ST_SZ_BYTES(dest_format_struct) *
+ dest_list_size);
+ if (!flow_context)
+ return -ENOMEM;
+
+ MLX5_SET(flow_context, flow_context, flow_tag, fr->flow_tag);
+ MLX5_SET(flow_context, flow_context, action, fr->action);
+ MLX5_SET(flow_context, flow_context, destination_list_size,
+ dest_list_size);
+
+ i = 0;
+ list_for_each_entry(dest_n, &fr->dest_list, list) {
+ void *dest_addr = MLX5_ADDR_OF(flow_context, flow_context,
+ destination[i++]);
+
+ MLX5_SET(dest_format_struct, dest_addr, destination_type,
+ dest_n->dest.type);
+ MLX5_SET(dest_format_struct, dest_addr, destination_id,
+ dest_n->dest.vport_num);
+ }
+
+ in_match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value);
+ memcpy(in_match_value, fr->match_value, MLX5_ST_SZ_BYTES(fte_match_param));
+
+ err = mlx5_add_flow_table_entry(fr->ft, fr->match_criteria_enable,
+ fr->match_criteria, flow_context,
+ &flow_index);
+ if (!err) {
+ if (was_valid)
+ mlx5_del_flow_table_entry(fr->ft, fr->fi);
+ fr->fi = flow_index;
+ fr->valid = true;
+ }
+ kfree(flow_context);
+ return err;
+}
+
+static int mlx5_flow_rule_add_dest(struct mlx5_flow_rule *fr,
+ struct mlx5_flow_destination *new_dest)
+{
+ struct dest_node *dest_n;
+ int err;
+
+ dest_n = kzalloc(sizeof(*dest_n), GFP_KERNEL);
+ if (!dest_n)
+ return -ENOMEM;
+
+ memcpy(&dest_n->dest, new_dest, sizeof(dest_n->dest));
+ mutex_lock(&fr->mutex);
+ list_add(&dest_n->list, &fr->dest_list);
+ err = _mlx5_flow_rule_apply(fr);
+ if (err) {
+ list_del(&dest_n->list);
+ kfree(dest_n);
+ }
+ mutex_unlock(&fr->mutex);
+ return err;
+}
+
+static int mlx5_flow_rule_del_dest(struct mlx5_flow_rule *fr,
+ struct mlx5_flow_destination *dest)
+{
+ struct dest_node *dest_n;
+ struct dest_node *n;
+ int err;
+
+ mutex_lock(&fr->mutex);
+ list_for_each_entry_safe(dest_n, n, &fr->dest_list, list) {
+ if (dest->vport_num == dest_n->dest.vport_num)
+ goto found;
+ }
+ mutex_unlock(&fr->mutex);
+ return -ENOENT;
+
+found:
+ list_del(&dest_n->list);
+ err = _mlx5_flow_rule_apply(fr);
+ mutex_unlock(&fr->mutex);
+ kfree(dest_n);
+
+ return err;
+}
+
+static struct mlx5_flow_rule *find_fr(struct mlx5_eswitch *esw,
+ u8 match_criteria_enable,
+ u32 *match_value)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *dmac_v;
+
+ dmac_v = MLX5_ADDR_OF(fte_match_param, match_value,
+ outer_headers.dmac_47_16);
+
+ /* UNICAST FULL MATCH */
+ if (!is_multicast_ether_addr(dmac_v))
+ return NULL;
+
+ /* MULTICAST FULL MATCH */
+ esw_mc = l2addr_hash_find(hash, dmac_v, struct esw_mc_addr);
+
+ return esw_mc ? esw_mc->uplink_rule : NULL;
+}
+
+static struct mlx5_flow_rule *alloc_fr(void *ft,
+ u8 match_criteria_enable,
+ u32 *match_criteria,
+ u32 *match_value,
+ u32 action,
+ u32 flow_tag)
+{
+ struct mlx5_flow_rule *fr = kzalloc(sizeof(*fr), GFP_KERNEL);
+
+ if (!fr)
+ return NULL;
+
+ fr->match_criteria = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ fr->match_value = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ if (!fr->match_criteria || !fr->match_value) {
+ kfree(fr->match_criteria);
+ kfree(fr->match_value);
+ kfree(fr);
+ return NULL;
+ }
+
+ memcpy(fr->match_criteria, match_criteria, MLX5_ST_SZ_BYTES(fte_match_param));
+ memcpy(fr->match_value, match_value, MLX5_ST_SZ_BYTES(fte_match_param));
+ fr->match_criteria_enable = match_criteria_enable;
+ fr->flow_tag = flow_tag;
+ fr->action = action;
+
+ mutex_init(&fr->mutex);
+ INIT_LIST_HEAD(&fr->dest_list);
+ atomic_set(&fr->refcount, 0);
+ fr->ft = ft;
+ return fr;
+}
+
+static void deref_fr(struct mlx5_flow_rule *fr)
+{
+ if (!atomic_dec_and_test(&fr->refcount))
+ return;
+
+ kfree(fr->match_criteria);
+ kfree(fr->match_value);
+ kfree(fr);
+}
+
+static struct mlx5_flow_rule *
+mlx5_add_flow_rule(struct mlx5_eswitch *esw,
+ u8 match_criteria_enable,
+ u32 *match_criteria,
+ u32 *match_value,
+ u32 action,
+ u32 flow_tag,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *fr;
+ int err;
+
+ fr = find_fr(esw, match_criteria_enable, match_value);
+ fr = fr ? fr : alloc_fr(esw->fdb_table.fdb, match_criteria_enable, match_criteria,
+ match_value, action, flow_tag);
+ if (!fr)
+ return NULL;
+
+ atomic_inc(&fr->refcount);
+
+ err = mlx5_flow_rule_add_dest(fr, dest);
+ if (err) {
+ deref_fr(fr);
+ return NULL;
+ }
+
+ return fr;
+}
+
+static void mlx5_del_flow_rule(struct mlx5_flow_rule *fr, u32 vport)
+{
+ struct mlx5_flow_destination dest;
+
+ dest.vport_num = vport;
+ mlx5_flow_rule_del_dest(fr, &dest);
+ deref_fr(fr);
+}
+
+/* E-Switch FDB */
+static struct mlx5_flow_rule *
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+{
+ int match_header = MLX5_MATCH_OUTER_HEADERS;
+ struct mlx5_flow_destination dest;
+ struct mlx5_flow_rule *flow_rule = NULL;
+ u32 *match_v;
+ u32 *match_c;
+ u8 *dmac_v;
+ u8 *dmac_c;
+
+ match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ if (!match_v || !match_c) {
+ pr_warn("FDB: Failed to alloc match parameters\n");
+ goto out;
+ }
+ dmac_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers.dmac_47_16);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers.dmac_47_16);
+
+ ether_addr_copy(dmac_v, mac);
+ /* Match criteria mask */
+ memset(dmac_c, 0xff, 6);
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport_num = vport;
+
+ esw_debug(esw->dev,
+ "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
+ dmac_v, dmac_c, vport);
+ flow_rule =
+ mlx5_add_flow_rule(esw,
+ match_header,
+ match_c,
+ match_v,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ 0, &dest);
+ if (IS_ERR_OR_NULL(flow_rule)) {
+ pr_warn(
+ "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+ dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
+ flow_rule = NULL;
+ }
+out:
+ kfree(match_v);
+ kfree(match_c);
+ return flow_rule;
+}
+
+static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_table_group g;
+ struct mlx5_flow_table *fdb;
+ u8 *dmac;
+
+ esw_debug(dev, "Create FDB log_max_size(%d)\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ memset(&g, 0, sizeof(g));
+ /* UC MC Full match rules*/
+ g.log_sz = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+ g.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac = MLX5_ADDR_OF(fte_match_param, g.match_criteria,
+ outer_headers.dmac_47_16);
+ /* Match criteria mask */
+ memset(dmac, 0xff, 6);
+
+ fdb = mlx5_create_flow_table(dev, 0,
+ MLX5_FLOW_TABLE_TYPE_ESWITCH,
+ 1, &g);
+ if (fdb)
+ esw_debug(dev, "ESW: FDB Table created fdb->id %d\n", mlx5_get_flow_table_id(fdb));
+ else
+ esw_warn(dev, "ESW: Failed to create FDB Table\n");
+
+ esw->fdb_table.fdb = fdb;
+ return fdb ? 0 : -ENOMEM;
+}
+
+static void esw_destroy_fdb_table(struct mlx5_eswitch *esw)
+{
+ if (!esw->fdb_table.fdb)
+ return;
+
+ esw_debug(esw->dev, "Destroy FDB Table fdb(%d)\n",
+ mlx5_get_flow_table_id(esw->fdb_table.fdb));
+ mlx5_destroy_flow_table(esw->fdb_table.fdb);
+ esw->fdb_table.fdb = NULL;
+}
+
+/* E-Switch vport UC/MC lists management */
+typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
+ struct vport_addr *vaddr);
+
+static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->l2_table.l2_hash;
+ struct esw_uc_addr *esw_uc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+ int err;
+
+ esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
+ if (esw_uc) {
+ esw_warn(esw->dev,
+ "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n",
+ mac, vport, esw_uc->vport);
+ return -EEXIST;
+ }
+
+ esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL);
+ if (!esw_uc)
+ return -ENOMEM;
+ esw_uc->vport = vport;
+
+ err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index);
+ if (err)
+ goto abort;
+
+ if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+
+ esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
+ vport, mac, esw_uc->table_index, vaddr->flow_rule);
+ return err;
+abort:
+ l2addr_hash_del(esw_uc);
+ return err;
+}
+
+static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->l2_table.l2_hash;
+ struct esw_uc_addr *esw_uc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+
+ esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
+ if (!esw_uc || esw_uc->vport != vport) {
+ esw_debug(esw->dev,
+ "MAC(%pM) doesn't belong to vport (%d)\n",
+ mac, vport);
+ return -EINVAL;
+ }
+ esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n",
+ vport, mac, esw_uc->table_index, vaddr->flow_rule);
+
+ del_l2_table_entry(esw->dev, esw_uc->table_index);
+
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rule(vaddr->flow_rule, vport);
+ vaddr->flow_rule = NULL;
+
+ l2addr_hash_del(esw_uc);
+ return 0;
+}
+
+static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+
+ if (!esw->fdb_table.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (esw_mc)
+ goto add;
+
+ esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL);
+ if (!esw_mc)
+ return -ENOMEM;
+
+ esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
+ esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+add:
+ esw_mc->refcnt++;
+ /* Forward MC MAC to vport */
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+ esw_debug(esw->dev,
+ "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule,
+ esw_mc->refcnt, esw_mc->uplink_rule);
+ return 0;
+}
+
+static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+
+ if (!esw->fdb_table.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (!esw_mc) {
+ esw_warn(esw->dev,
+ "Failed to find eswitch MC addr for MAC(%pM) vport(%d)",
+ mac, vport);
+ return -EINVAL;
+ }
+ esw_debug(esw->dev,
+ "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule, esw_mc->refcnt,
+ esw_mc->uplink_rule);
+
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rule(vaddr->flow_rule, vport);
+ vaddr->flow_rule = NULL;
+
+ if (--esw_mc->refcnt)
+ return 0;
+
+ if (esw_mc->uplink_rule)
+ mlx5_del_flow_rule(esw_mc->uplink_rule, UPLINK_VPORT);
+
+ l2addr_hash_del(esw_mc);
+ return 0;
+}
+
+/* Apply vport UC/MC list to HW l2 table and FDB table */
+static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
+ u32 vport_num, int list_type)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ vport_addr_action vport_addr_add;
+ vport_addr_action vport_addr_del;
+ struct vport_addr *addr;
+ struct l2addr_node *node;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int hi;
+
+ vport_addr_add = is_uc ? esw_add_uc_addr :
+ esw_add_mc_addr;
+ vport_addr_del = is_uc ? esw_del_uc_addr :
+ esw_del_mc_addr;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ switch (addr->action) {
+ case MLX5_ACTION_ADD:
+ vport_addr_add(esw, addr);
+ addr->action = MLX5_ACTION_NONE;
+ break;
+ case MLX5_ACTION_DEL:
+ vport_addr_del(esw, addr);
+ l2addr_hash_del(addr);
+ break;
+ }
+ }
+}
+
+/* Sync vport UC/MC list from vport context */
+static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
+ u32 vport_num, int list_type)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ u8 (*mac_list)[ETH_ALEN];
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int size;
+ int err;
+ int hi;
+ int i;
+
+ size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) :
+ MLX5_MAX_MC_PER_VPORT(esw->dev);
+
+ mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!mac_list)
+ return;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ addr->action = MLX5_ACTION_DEL;
+ }
+
+ err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type,
+ mac_list, &size);
+ if (err)
+ return;
+ esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n",
+ vport_num, is_uc ? "UC" : "MC", size);
+
+ for (i = 0; i < size; i++) {
+ if (is_uc && !is_valid_ether_addr(mac_list[i]))
+ continue;
+
+ if (!is_uc && !is_multicast_ether_addr(mac_list[i]))
+ continue;
+
+ addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr);
+ if (addr) {
+ addr->action = MLX5_ACTION_NONE;
+ continue;
+ }
+
+ addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ esw_warn(esw->dev,
+ "Failed to add MAC(%pM) to vport[%d] DB\n",
+ mac_list[i], vport_num);
+ continue;
+ }
+ addr->vport = vport_num;
+ addr->action = MLX5_ACTION_ADD;
+ }
+ kfree(mac_list);
+}
+
+static void esw_vport_change_handler(struct work_struct *work)
+{
+ struct mlx5_vport *vport =
+ container_of(work, struct mlx5_vport, vport_change_handler);
+ struct mlx5_core_dev *dev = vport->dev;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ u8 mac[ETH_ALEN];
+
+ mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+ esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
+ vport->vport, mac);
+
+ if (vport->enabled_events & UC_ADDR_CHANGE) {
+ esw_update_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_UC);
+ esw_apply_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_UC);
+ }
+
+ if (vport->enabled_events & MC_ADDR_CHANGE) {
+ esw_update_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_MC);
+ esw_apply_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_MC);
+ }
+
+ esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport);
+ if (vport->enabled)
+ arm_vport_context_events_cmd(dev, vport->vport,
+ vport->enabled_events);
+}
+
+static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
+ int enable_events)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ unsigned long flags;
+
+ WARN_ON(vport->enabled);
+
+ esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ vport_num,
+ MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
+
+ /* Sync with current vport context */
+ vport->enabled_events = enable_events;
+ esw_vport_change_handler(&vport->vport_change_handler);
+
+ spin_lock_irqsave(&vport->lock, flags);
+ vport->enabled = true;
+ spin_unlock_irqrestore(&vport->lock, flags);
+
+ arm_vport_context_events_cmd(esw->dev, vport_num, enable_events);
+
+ esw->enabled_vports++;
+ esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
+}
+
+static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_node *tmp;
+ int hi;
+
+ for_each_l2hash_node(node, tmp, vport->uc_list, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ addr->action = MLX5_ACTION_DEL;
+ }
+ esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC);
+
+ for_each_l2hash_node(node, tmp, vport->mc_list, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ addr->action = MLX5_ACTION_DEL;
+ }
+ esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC);
+}
+
+static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ unsigned long flags;
+
+ if (!vport->enabled)
+ return;
+
+ esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num);
+ /* Mark this vport as disabled to discard new events */
+ spin_lock_irqsave(&vport->lock, flags);
+ vport->enabled = false;
+ vport->enabled_events = 0;
+ spin_unlock_irqrestore(&vport->lock, flags);
+
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ vport_num,
+ MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+ /* Wait for current already scheduled events to complete */
+ flush_workqueue(esw->work_queue);
+ /* Disable events from this vport */
+ arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+ /* We don't assume VFs will cleanup after themselves */
+ esw_cleanup_vport(esw, vport_num);
+ esw->enabled_vports--;
+}
+
+/* Public E-Switch API */
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs)
+{
+ int err;
+ int i;
+
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+ MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return 0;
+
+ if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+ esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+ return -ENOTSUPP;
+ }
+
+ esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs);
+
+ esw_disable_vport(esw, 0);
+
+ err = esw_create_fdb_table(esw, nvfs + 1);
+ if (err)
+ goto abort;
+
+ for (i = 0; i <= nvfs; i++)
+ esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS);
+
+ esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
+ esw->enabled_vports);
+ return 0;
+
+abort:
+ esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+ return err;
+}
+
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+{
+ int i;
+
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+ MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return;
+
+ esw_info(esw->dev, "disable SRIOV: active vports(%d)\n",
+ esw->enabled_vports);
+
+ for (i = 0; i < esw->total_vports; i++)
+ esw_disable_vport(esw, i);
+
+ esw_destroy_fdb_table(esw);
+
+ /* VPORT 0 (PF) must be enabled back with non-sriov configuration */
+ esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+}
+
+int mlx5_eswitch_init(struct mlx5_core_dev *dev)
+{
+ int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
+ int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev);
+ struct mlx5_eswitch *esw;
+ int vport_num;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return 0;
+
+ esw_info(dev,
+ "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n",
+ total_vports, l2_table_size,
+ MLX5_MAX_UC_PER_VPORT(dev),
+ MLX5_MAX_MC_PER_VPORT(dev));
+
+ esw = kzalloc(sizeof(*esw), GFP_KERNEL);
+ if (!esw)
+ return -ENOMEM;
+
+ esw->dev = dev;
+
+ esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size),
+ sizeof(uintptr_t), GFP_KERNEL);
+ if (!esw->l2_table.bitmap) {
+ err = -ENOMEM;
+ goto abort;
+ }
+ esw->l2_table.size = l2_table_size;
+
+ esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
+ if (!esw->work_queue) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
+ GFP_KERNEL);
+ if (!esw->vports) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ for (vport_num = 0; vport_num < total_vports; vport_num++) {
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ vport->vport = vport_num;
+ vport->dev = dev;
+ INIT_WORK(&vport->vport_change_handler,
+ esw_vport_change_handler);
+ spin_lock_init(&vport->lock);
+ }
+
+ esw->total_vports = total_vports;
+ esw->enabled_vports = 0;
+
+ dev->priv.eswitch = esw;
+ esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+ /* VF Vports will be enabled when SRIOV is enabled */
+ return 0;
+abort:
+ if (esw->work_queue)
+ destroy_workqueue(esw->work_queue);
+ kfree(esw->l2_table.bitmap);
+ kfree(esw->vports);
+ kfree(esw);
+ return err;
+}
+
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+{
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+ MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return;
+
+ esw_info(esw->dev, "cleanup\n");
+ esw_disable_vport(esw, 0);
+
+ esw->dev->priv.eswitch = NULL;
+ destroy_workqueue(esw->work_queue);
+ kfree(esw->l2_table.bitmap);
+ kfree(esw->vports);
+ kfree(esw);
+}
+
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
+{
+ struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
+ u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
+ struct mlx5_vport *vport;
+
+ if (!esw) {
+ pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
+ vport_num);
+ return;
+ }
+
+ vport = &esw->vports[vport_num];
+ spin_lock(&vport->lock);
+ if (vport->enabled)
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+ spin_unlock(&vport->lock);
+}
+
+/* Vport Administration */
+#define ESW_ALLOWED(esw) \
+ (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
+#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
+
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ int vport, u8 mac[ETH_ALEN])
+{
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
+ vport, err);
+ return err;
+ }
+
+ return err;
+}
+
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ int vport, int link_state)
+{
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ return mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ vport, link_state);
+}
+
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ int vport, struct ifla_vf_info *ivi)
+{
+ u16 vlan;
+ u8 qos;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ memset(ivi, 0, sizeof(*ivi));
+ ivi->vf = vport - 1;
+
+ mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
+ ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ vport);
+ query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
+ ivi->vlan = vlan;
+ ivi->qos = qos;
+ ivi->spoofchk = 0;
+
+ return 0;
+}
+
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos)
+{
+ int set = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
+ return -EINVAL;
+
+ if (vlan || qos)
+ set = 1;
+
+ return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
+}
+
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ int vport,
+ struct ifla_vf_stats *vf_stats)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+ int err = 0;
+ u32 *out;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+
+ memset(out, 0, outlen);
+ err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
+ if (err)
+ goto free_out;
+
+ #define MLX5_GET_CTR(p, x) \
+ MLX5_GET64(query_vport_counter_out, p, x)
+
+ memset(vf_stats, 0, sizeof(*vf_stats));
+ vf_stats->rx_packets =
+ MLX5_GET_CTR(out, received_eth_unicast.packets) +
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ vf_stats->rx_bytes =
+ MLX5_GET_CTR(out, received_eth_unicast.octets) +
+ MLX5_GET_CTR(out, received_eth_multicast.octets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.octets);
+
+ vf_stats->tx_packets =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+
+ vf_stats->tx_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+ vf_stats->multicast =
+ MLX5_GET_CTR(out, received_eth_multicast.packets);
+
+ vf_stats->broadcast =
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+free_out:
+ kvfree(out);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
new file mode 100644
index 000000000000..02ff3eade026
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_ESWITCH_H__
+#define __MLX5_ESWITCH_H__
+
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/mlx5/device.h>
+
+#define MLX5_MAX_UC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
+
+#define MLX5_MAX_MC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
+
+#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
+#define MLX5_L2_ADDR_HASH(addr) (addr[5])
+
+/* L2 -mac address based- hash helpers */
+struct l2addr_node {
+ struct hlist_node hlist;
+ u8 addr[ETH_ALEN];
+};
+
+#define for_each_l2hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
+ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
+
+#define l2addr_hash_find(hash, mac, type) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ bool found = false; \
+ type *ptr = NULL; \
+ \
+ hlist_for_each_entry(ptr, &hash[ix], node.hlist) \
+ if (ether_addr_equal(ptr->node.addr, mac)) {\
+ found = true; \
+ break; \
+ } \
+ if (!found) \
+ ptr = NULL; \
+ ptr; \
+})
+
+#define l2addr_hash_add(hash, mac, type, gfp) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ type *ptr = NULL; \
+ \
+ ptr = kzalloc(sizeof(type), gfp); \
+ if (ptr) { \
+ ether_addr_copy(ptr->node.addr, mac); \
+ hlist_add_head(&ptr->node.hlist, &hash[ix]);\
+ } \
+ ptr; \
+})
+
+#define l2addr_hash_del(ptr) ({ \
+ hlist_del(&ptr->node.hlist); \
+ kfree(ptr); \
+})
+
+struct mlx5_flow_rule {
+ void *ft;
+ u32 fi;
+ u8 match_criteria_enable;
+ u32 *match_criteria;
+ u32 *match_value;
+ u32 action;
+ u32 flow_tag;
+ bool valid;
+ atomic_t refcount;
+ struct mutex mutex; /* protect flow rule updates */
+ struct list_head dest_list;
+};
+
+struct mlx5_vport {
+ struct mlx5_core_dev *dev;
+ int vport;
+ struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct work_struct vport_change_handler;
+
+ /* This spinlock protects access to vport data, between
+ * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event"
+ * once vport marked as disabled new interrupts are discarded.
+ */
+ spinlock_t lock; /* vport events sync */
+ bool enabled;
+ u16 enabled_events;
+};
+
+struct mlx5_l2_table {
+ struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE];
+ u32 size;
+ unsigned long *bitmap;
+};
+
+struct mlx5_eswitch_fdb {
+ void *fdb;
+};
+
+struct mlx5_eswitch {
+ struct mlx5_core_dev *dev;
+ struct mlx5_l2_table l2_table;
+ struct mlx5_eswitch_fdb fdb_table;
+ struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
+ struct workqueue_struct *work_queue;
+ struct mlx5_vport *vports;
+ int total_vports;
+ int enabled_vports;
+};
+
+/* E-Switch API */
+int mlx5_eswitch_init(struct mlx5_core_dev *dev);
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ int vport, u8 mac[ETH_ALEN]);
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ int vport, int link_state);
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos);
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ int vport, struct ifla_vf_info *ivi);
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ int vport,
+ struct ifla_vf_stats *vf_stats);
+
+#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 9335e5ae18cc..1c9f9a54a873 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -160,6 +160,30 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
if (err)
return err;
}
+
+ if (MLX5_CAP_GEN(dev, vport_group_manager) &&
+ MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
+ HCA_CAP_OPMOD_GET_CUR);
+ if (err)
+ return err;
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
+ HCA_CAP_OPMOD_GET_MAX);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vport_group_manager)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
+ HCA_CAP_OPMOD_GET_CUR);
+ if (err)
+ return err;
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
+ HCA_CAP_OPMOD_GET_MAX);
+ if (err)
+ return err;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 4ac8d4cc4973..c6de3240f76f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -49,6 +49,9 @@
#include <linux/delay.h>
#include <linux/mlx5/mlx5_ifc.h>
#include "mlx5_core.h"
+#ifdef CONFIG_MLX5_CORE_EN
+#include "eswitch.h"
+#endif
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -454,6 +457,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev)
struct mlx5_reg_host_endianess he_out;
int err;
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
memset(&he_in, 0, sizeof(he_in));
he_in.he = MLX5_SET_HOST_ENDIANNESS;
err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in),
@@ -462,42 +468,39 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev)
return err;
}
-static int mlx5_core_enable_hca(struct mlx5_core_dev *dev)
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
{
+ u32 out[MLX5_ST_SZ_DW(enable_hca_out)];
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)];
int err;
- struct mlx5_enable_hca_mbox_in in;
- struct mlx5_enable_hca_mbox_out out;
- memset(&in, 0, sizeof(in));
- memset(&out, 0, sizeof(out));
- in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA);
+ memset(in, 0, sizeof(in));
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, func_id);
+ memset(out, 0, sizeof(out));
+
err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
if (err)
return err;
- if (out.hdr.status)
- return mlx5_cmd_status_to_err(&out.hdr);
-
- return 0;
+ return mlx5_cmd_status_to_err_v2(out);
}
-static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
{
+ u32 out[MLX5_ST_SZ_DW(disable_hca_out)];
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)];
int err;
- struct mlx5_disable_hca_mbox_in in;
- struct mlx5_disable_hca_mbox_out out;
- memset(&in, 0, sizeof(in));
- memset(&out, 0, sizeof(out));
- in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA);
- err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+ memset(in, 0, sizeof(in));
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, func_id);
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
- if (out.hdr.status)
- return mlx5_cmd_status_to_err(&out.hdr);
-
- return 0;
+ return mlx5_cmd_status_to_err_v2(out);
}
static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
@@ -942,7 +945,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_pagealloc_init(dev);
- err = mlx5_core_enable_hca(dev);
+ err = mlx5_core_enable_hca(dev, 0);
if (err) {
dev_err(&pdev->dev, "enable hca failed\n");
goto err_pagealloc_cleanup;
@@ -1052,6 +1055,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_init_srq_table(dev);
mlx5_init_mr_table(dev);
+#ifdef CONFIG_MLX5_CORE_EN
+ err = mlx5_eswitch_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "eswitch init failed %d\n", err);
+ goto err_reg_dev;
+ }
+#endif
+
+ err = mlx5_sriov_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "sriov init failed %d\n", err);
+ goto err_sriov;
+ }
+
err = mlx5_register_device(dev);
if (err) {
dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
@@ -1068,6 +1085,13 @@ out:
return 0;
+err_sriov:
+ if (mlx5_sriov_cleanup(dev))
+ dev_err(&dev->pdev->dev, "sriov cleanup failed\n");
+
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+#endif
err_reg_dev:
mlx5_cleanup_mr_table(dev);
mlx5_cleanup_srq_table(dev);
@@ -1106,7 +1130,7 @@ reclaim_boot_pages:
mlx5_reclaim_startup_pages(dev);
err_disable_hca:
- mlx5_core_disable_hca(dev);
+ mlx5_core_disable_hca(dev, 0);
err_pagealloc_cleanup:
mlx5_pagealloc_cleanup(dev);
@@ -1123,6 +1147,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
{
int err = 0;
+ err = mlx5_sriov_cleanup(dev);
+ if (err) {
+ dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n",
+ __func__);
+ return err;
+ }
+
mutex_lock(&dev->intf_state_mutex);
if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) {
dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
@@ -1130,6 +1161,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto out;
}
mlx5_unregister_device(dev);
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+#endif
+
mlx5_cleanup_mr_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
@@ -1149,7 +1184,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
}
mlx5_pagealloc_stop(dev);
mlx5_reclaim_startup_pages(dev);
- mlx5_core_disable_hca(dev);
+ mlx5_core_disable_hca(dev, 0);
mlx5_pagealloc_cleanup(dev);
mlx5_cmd_cleanup(dev);
@@ -1195,6 +1230,7 @@ static int init_one(struct pci_dev *pdev,
return -ENOMEM;
}
priv = &dev->priv;
+ priv->pci_dev_data = id->driver_data;
pci_set_drvdata(pdev, dev);
@@ -1365,12 +1401,12 @@ static const struct pci_error_handlers mlx5_err_handler = {
};
static const struct pci_device_id mlx5_core_pci_table[] = {
- { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */
- { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */
- { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */
- { PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */
- { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */
- { PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */
+ { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */
+ { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */
+ { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */
+ { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */
+ { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */
+ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */
{ 0, }
};
@@ -1381,7 +1417,8 @@ static struct pci_driver mlx5_core_driver = {
.id_table = mlx5_core_pci_table,
.probe = init_one,
.remove = remove_one,
- .err_handler = &mlx5_err_handler
+ .err_handler = &mlx5_err_handler,
+ .sriov_configure = mlx5_core_sriov_configure,
};
static int __init init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index cee5b7a839bc..bee7da822dfe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -36,6 +36,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/if_link.h>
#define DRIVER_NAME "mlx5_core"
#define DRIVER_VERSION "3.0-1"
@@ -90,6 +91,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param);
void mlx5_enter_error_state(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev);
+int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
void mlx5e_init(void);
void mlx5e_cleanup(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 4d3377b12657..9eeee0545f1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -33,6 +33,7 @@
#include <linux/highmem.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/delay.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
@@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox {
enum {
MAX_RECLAIM_TIME_MSECS = 5000,
+ MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
};
enum {
@@ -352,6 +354,10 @@ retry:
goto out_4k;
}
+ dev->priv.fw_pages += npages;
+ if (func_id)
+ dev->priv.vfs_pages += npages;
+
mlx5_core_dbg(dev, "err %d\n", err);
kvfree(in);
@@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
}
num_claimed = be32_to_cpu(out->num_entries);
+ if (num_claimed > npages) {
+ mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
+ num_claimed, npages);
+ err = -EINVAL;
+ goto out_free;
+ }
if (nclaimed)
*nclaimed = num_claimed;
@@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
addr = be64_to_cpu(out->pas[i]);
free_4k(dev, addr);
}
+ dev->priv.fw_pages -= num_claimed;
+ if (func_id)
+ dev->priv.vfs_pages -= num_claimed;
out_free:
kvfree(out);
@@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
{
destroy_workqueue(dev->priv.pg_wq);
}
+
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+{
+ unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ int prev_vfs_pages = dev->priv.vfs_pages;
+
+ mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
+ dev->priv.name);
+ while (dev->priv.vfs_pages) {
+ if (time_after(jiffies, end)) {
+ mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+ return -ETIMEDOUT;
+ }
+ if (dev->priv.vfs_pages < prev_vfs_pages) {
+ end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ prev_vfs_pages = dev->priv.vfs_pages;
+ }
+ msleep(50);
+ }
+
+ mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
new file mode 100644
index 000000000000..7b24386794f9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_MLX5_CORE_EN
+#include "eswitch.h"
+#endif
+
+static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+ int vf;
+
+ for (vf = 1; vf <= num_vfs; vf++) {
+ err = mlx5_core_enable_hca(dev, vf);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1);
+ } else {
+ sriov->vfs_ctx[vf - 1].enabled = 1;
+ mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1);
+ }
+ }
+}
+
+static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int vf;
+
+ for (vf = 1; vf <= num_vfs; vf++) {
+ if (sriov->vfs_ctx[vf - 1].enabled) {
+ if (mlx5_core_disable_hca(dev, vf))
+ mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1);
+ else
+ sriov->vfs_ctx[vf - 1].enabled = 0;
+ }
+ }
+}
+
+static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+ if (pci_num_vf(pdev))
+ pci_disable_sriov(pdev);
+
+ enable_vfs(dev, num_vfs);
+
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err) {
+ dev_warn(&pdev->dev, "enable sriov failed %d\n", err);
+ goto ex;
+ }
+
+ return 0;
+
+ex:
+ disable_vfs(dev, num_vfs);
+ return err;
+}
+
+static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+
+ kfree(sriov->vfs_ctx);
+ sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC);
+ if (!sriov->vfs_ctx)
+ return -ENOMEM;
+
+ sriov->enabled_vfs = num_vfs;
+ err = mlx5_core_create_vfs(pdev, num_vfs);
+ if (err) {
+ kfree(sriov->vfs_ctx);
+ sriov->vfs_ctx = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ sriov->num_vfs = num_vfs;
+}
+
+static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov;
+
+ sriov = &dev->priv.sriov;
+ disable_vfs(dev, sriov->num_vfs);
+
+ if (mlx5_wait_for_vf_pages(dev))
+ mlx5_core_warn(dev, "timeout claiming VFs pages\n");
+
+ sriov->num_vfs = 0;
+}
+
+int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+
+ mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs);
+ if (!mlx5_core_is_pf(dev))
+ return -EPERM;
+
+ mlx5_core_cleanup_vfs(dev);
+
+ if (!num_vfs) {
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+#endif
+ kfree(sriov->vfs_ctx);
+ sriov->vfs_ctx = NULL;
+ if (!pci_vfs_assigned(pdev))
+ pci_disable_sriov(pdev);
+ else
+ pr_info("unloading PF driver while leaving orphan VFs\n");
+ return 0;
+ }
+
+ err = mlx5_core_sriov_enable(pdev, num_vfs);
+ if (err) {
+ dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err);
+ return err;
+ }
+
+ mlx5_core_init_vfs(dev, num_vfs);
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs);
+#endif
+
+ return num_vfs;
+}
+
+static int sync_required(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int cur_vfs = pci_num_vf(pdev);
+
+ if (cur_vfs != sriov->num_vfs) {
+ pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs);
+ return 1;
+ }
+
+ return 0;
+}
+
+int mlx5_sriov_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct pci_dev *pdev = dev->pdev;
+ int cur_vfs;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ if (!sync_required(dev->pdev))
+ return 0;
+
+ cur_vfs = pci_num_vf(pdev);
+ sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
+ if (!sriov->vfs_ctx)
+ return -ENOMEM;
+
+ sriov->enabled_vfs = cur_vfs;
+
+ mlx5_core_init_vfs(dev, cur_vfs);
+#ifdef CONFIG_MLX5_CORE_EN
+ if (cur_vfs)
+ mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs);
+#endif
+
+ enable_vfs(dev, cur_vfs);
+
+ return 0;
+}
+
+int mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ err = mlx5_core_sriov_configure(pdev, 0);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index b94177ebcf3a..076197efea9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -36,54 +36,399 @@
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
-u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod)
+static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u32 *out, int outlen)
{
- u32 in[MLX5_ST_SZ_DW(query_vport_state_in)];
- u32 out[MLX5_ST_SZ_DW(query_vport_state_out)];
int err;
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)];
memset(in, 0, sizeof(in));
MLX5_SET(query_vport_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VPORT_STATE);
MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
- err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
- sizeof(out));
+ err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
if (err)
mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n");
+ return err;
+}
+
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
+
+ _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+
return MLX5_GET(query_vport_state_out, out, state);
}
-EXPORT_SYMBOL(mlx5_query_vport_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_state);
+
+u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
+
+ _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+
+ return MLX5_GET(query_vport_state_out, out, admin_state);
+}
+EXPORT_SYMBOL(mlx5_query_vport_admin_state);
-void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
+int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u8 state)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)];
+ u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(modify_vport_state_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_VPORT_STATE);
+ MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(modify_vport_state_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+
+ MLX5_SET(modify_vport_state_in, in, admin_state, state);
+
+ err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
+ sizeof(out));
+ if (err)
+ mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n");
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+
+static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+}
+
+static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *addr)
{
- u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
u8 *out_addr;
+ int err;
out = mlx5_vzalloc(outlen);
if (!out)
- return;
+ return -ENOMEM;
out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
nic_vport_context.permanent_address);
+ err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ if (err)
+ goto out;
+
+ ether_addr_copy(addr, &out_addr[2]);
+
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
+
+int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *addr)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+ void *nic_vport_ctx;
+ u8 *perm_mac;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ mlx5_core_warn(mdev, "failed to allocate inbox\n");
+ return -ENOMEM;
+ }
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.permanent_address, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx,
+ permanent_address);
+
+ ether_addr_copy(&perm_mac[2], addr);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+
+int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ u32 vport,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int *list_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int req_list_size;
+ int out_sz;
+ void *out;
+ int err;
+ int i;
+
+ req_list_size = *list_size;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (req_list_size > max_list_size) {
+ mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n",
+ req_list_size, max_list_size);
+ req_list_size = max_list_size;
+ }
+
+ out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
memset(in, 0, sizeof(in));
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
MLX5_SET(query_nic_vport_context_in, in, opcode,
MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
- memset(out, 0, outlen);
- mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
- ether_addr_copy(addr, &out_addr[2]);
+ err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
- kvfree(out);
+ nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size);
+
+ *list_size = req_list_size;
+ for (i = 0; i < req_list_size; i++) {
+ u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(addr_list[i], mac_addr);
+ }
+out:
+ kfree(out);
+ return err;
}
-EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
+
+int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+ memset(out, 0, sizeof(out));
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, list_type);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(curr_mac, addr_list[i]);
+ }
+
+ err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
+
+int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u32 vport,
+ u16 vlans[],
+ int *size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+ void *nic_vport_ctx;
+ int req_list_size;
+ int max_list_size;
+ int out_sz;
+ void *out;
+ int err;
+ int i;
+
+ req_list_size = *size;
+ max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+ if (req_list_size > max_list_size) {
+ mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n",
+ req_list_size, max_list_size);
+ req_list_size = max_list_size;
+ }
+
+ out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ req_list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+ memset(in, 0, sizeof(in));
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, allowed_list_type,
+ MLX5_NVPRT_LIST_TYPE_VLAN);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
+
+ nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size);
+
+ *size = req_list_size;
+ for (i = 0; i < req_list_size; i++) {
+ void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]);
+ vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan);
+ }
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans);
+
+int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u16 vlans[],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+ memset(out, 0, sizeof(out));
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]);
+ MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]);
+ }
+
+ err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
u8 port_num, u16 vf_num, u16 gid_index,
@@ -343,3 +688,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
+
+int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ u32 vport,
+ int *promisc_uc,
+ int *promisc_mc,
+ int *promisc_all)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ if (err)
+ goto out;
+
+ *promisc_uc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_uc);
+ *promisc_mc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_mc);
+ *promisc_all = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_all);
+
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
+
+int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ int promisc_uc,
+ int promisc_mc,
+ int promisc_all)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ mlx5_core_err(mdev, "failed to allocate inbox\n");
+ return -ENOMEM;
+ }
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_uc, promisc_uc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_mc, promisc_mc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_all, promisc_all);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 1ecb4aabbdc7..af8a48b3b3ad 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -105,6 +105,9 @@ struct mlxsw_core {
struct debugfs_blob_wrapper vsd_blob;
struct debugfs_blob_wrapper psid_blob;
} dbg;
+ struct {
+ u8 *mapping; /* lag_id+port_index to local_port mapping */
+ } lag;
struct mlxsw_hwmon *hwmon;
unsigned long driver_priv[0];
/* driver_priv has to be always the last item */
@@ -815,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
goto err_alloc_stats;
}
+ if (mlxsw_driver->profile->used_max_lag &&
+ mlxsw_driver->profile->used_max_port_per_lag) {
+ alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag *
+ mlxsw_driver->profile->max_port_per_lag;
+ mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL);
+ if (!mlxsw_core->lag.mapping) {
+ err = -ENOMEM;
+ goto err_alloc_lag_mapping;
+ }
+ }
+
err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile);
if (err)
goto err_bus_init;
@@ -847,6 +861,8 @@ err_hwmon_init:
err_emad_init:
mlxsw_bus->fini(bus_priv);
err_bus_init:
+ kfree(mlxsw_core->lag.mapping);
+err_alloc_lag_mapping:
free_percpu(mlxsw_core->pcpu_stats);
err_alloc_stats:
kfree(mlxsw_core);
@@ -865,6 +881,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core)
mlxsw_hwmon_fini(mlxsw_core->hwmon);
mlxsw_emad_fini(mlxsw_core);
mlxsw_core->bus->fini(mlxsw_core->bus_priv);
+ kfree(mlxsw_core->lag.mapping);
free_percpu(mlxsw_core->pcpu_stats);
kfree(mlxsw_core);
mlxsw_core_driver_put(device_kind);
@@ -1196,11 +1213,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
struct mlxsw_rx_listener_item *rxl_item;
const struct mlxsw_rx_listener *rxl;
struct mlxsw_core_pcpu_stats *pcpu_stats;
- u8 local_port = rx_info->sys_port;
+ u8 local_port;
bool found = false;
- dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n",
- __func__, rx_info->sys_port, rx_info->trap_id);
+ if (rx_info->is_lag) {
+ dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n",
+ __func__, rx_info->u.lag_id,
+ rx_info->trap_id);
+ /* Upper layer does not care if the skb came from LAG or not,
+ * so just get the local_port for the lag port and push it up.
+ */
+ local_port = mlxsw_core_lag_mapping_get(mlxsw_core,
+ rx_info->u.lag_id,
+ rx_info->lag_port_index);
+ } else {
+ local_port = rx_info->u.sys_port;
+ }
+
+ dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n",
+ __func__, local_port, rx_info->trap_id);
if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) ||
(local_port >= MLXSW_PORT_MAX_PORTS))
@@ -1244,6 +1275,48 @@ drop:
}
EXPORT_SYMBOL(mlxsw_core_skb_receive);
+static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index)
+{
+ return mlxsw_core->driver->profile->max_port_per_lag * lag_id +
+ port_index;
+}
+
+void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index, u8 local_port)
+{
+ int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+ lag_id, port_index);
+
+ mlxsw_core->lag.mapping[index] = local_port;
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_set);
+
+u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index)
+{
+ int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+ lag_id, port_index);
+
+ return mlxsw_core->lag.mapping[index];
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_get);
+
+void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 local_port)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) {
+ int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+ lag_id, i);
+
+ if (mlxsw_core->lag.mapping[index] == local_port)
+ mlxsw_core->lag.mapping[index] = 0;
+ }
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear);
+
int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
u32 in_mod, bool out_mbox_direct,
char *in_mbox, size_t in_mbox_size,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 5ac952956d55..4833fb33ce07 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload);
struct mlxsw_rx_info {
- u16 sys_port;
+ bool is_lag;
+ union {
+ u16 sys_port;
+ u16 lag_id;
+ } u;
+ u8 lag_port_index;
int trap_id;
};
void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
struct mlxsw_rx_info *rx_info);
+void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index, u8 local_port);
+u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index);
+void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 local_port);
+
#define MLXSW_CONFIG_PROFILE_SWID_COUNT 8
struct mlxsw_swid_config {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index de69e719dc9d..d2102e572b1d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -686,11 +686,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
if (q->consumer_counter++ != consumer_counter_limit)
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
- /* We do not support lag now */
- if (mlxsw_pci_cqe_lag_get(cqe))
- goto drop;
+ if (mlxsw_pci_cqe_lag_get(cqe)) {
+ rx_info.is_lag = true;
+ rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe);
+ rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe);
+ } else {
+ rx_info.is_lag = false;
+ rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
+ }
- rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe);
byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
@@ -699,7 +703,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
skb_put(skb, byte_count);
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
-put_new_skb:
memset(wqe, 0, q->elem_size);
err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
if (err)
@@ -708,10 +711,6 @@ put_new_skb:
q->producer_counter++;
mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
return;
-
-drop:
- dev_kfree_skb_any(skb);
- goto put_new_skb;
}
static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index 142f33d978c5..912106054ff2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -129,13 +129,15 @@ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
*/
MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
-/* pci_cqe_system_port
+/* pci_cqe_system_port/lag_id
* When lag=0: System port on which the packet was received
* When lag=1:
* bits [15:4] LAG ID on which the packet was received
* bits [3:0] sub_port on which the packet was received
*/
MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
+MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
+MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
/* pci_cqe_wqe_counter
* WQE count of the WQEs completed on the associated dqn
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index f89419393a7f..4e4e4dcf054f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -286,6 +286,7 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8,
enum mlxsw_reg_sfd_rec_type {
MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1,
};
/* reg_sfd_rec_type
@@ -376,24 +377,34 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16,
MLXSW_REG_SFD_REC_LEN, 0x0C, false);
-static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
- enum mlxsw_reg_sfd_rec_policy policy,
- const char *mac, u16 vid,
- enum mlxsw_reg_sfd_rec_action action,
- u8 local_port)
+static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index,
+ enum mlxsw_reg_sfd_rec_type rec_type,
+ enum mlxsw_reg_sfd_rec_policy policy,
+ const char *mac,
+ enum mlxsw_reg_sfd_rec_action action)
{
u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload);
if (rec_index >= num_rec)
mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1);
mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0);
- mlxsw_reg_sfd_rec_type_set(payload, rec_index,
- MLXSW_REG_SFD_REC_TYPE_UNICAST);
+ mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type);
mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy);
mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac);
+ mlxsw_reg_sfd_rec_action_set(payload, rec_index, action);
+}
+
+static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
+ enum mlxsw_reg_sfd_rec_policy policy,
+ const char *mac, u16 vid,
+ enum mlxsw_reg_sfd_rec_action action,
+ u8 local_port)
+{
+ mlxsw_reg_sfd_rec_pack(payload, rec_index,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST,
+ policy, mac, action);
mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0);
mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid);
- mlxsw_reg_sfd_rec_action_set(payload, rec_index, action);
mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port);
}
@@ -406,6 +417,58 @@ static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index,
*p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index);
}
+/* reg_sfd_uc_lag_sub_port
+ * LAG sub port.
+ * Must be 0 if multichannel VEPA is not enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_lag_fid_vid
+ * Filtering ID or VLAN ID
+ * For SwitchX and SwitchX-2:
+ * - Dynamic entries (policy 2,3) use FID
+ * - Static entries (policy 0) use VID
+ * - When independent learning is configured, VID=FID
+ * For Spectrum: use FID for both Dynamic and Static entries.
+ * VID should not be used.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_lag_lag_id
+ * LAG Identifier - pointer into the LAG descriptor table.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10,
+ MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+static inline void
+mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index,
+ enum mlxsw_reg_sfd_rec_policy policy,
+ const char *mac, u16 vid,
+ enum mlxsw_reg_sfd_rec_action action,
+ u16 lag_id)
+{
+ mlxsw_reg_sfd_rec_pack(payload, rec_index,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG,
+ policy, mac, action);
+ mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0);
+ mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid);
+ mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id);
+}
+
+static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index,
+ char *mac, u16 *p_vid,
+ u16 *p_lag_id)
+{
+ mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac);
+ *p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index);
+ *p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index);
+}
+
/* SFN - Switch FDB Notification Register
* -------------------------------------------
* The switch provides notifications on newly learned FDB entries and
@@ -456,8 +519,12 @@ MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8,
enum mlxsw_reg_sfn_rec_type {
/* MAC addresses learned on a regular port. */
MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5,
- /* Aged-out MAC address on a regular port */
+ /* MAC addresses learned on a LAG port. */
+ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6,
+ /* Aged-out MAC address on a regular port. */
MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7,
+ /* Aged-out MAC address on a LAG port. */
+ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8,
};
/* reg_sfn_rec_type
@@ -505,6 +572,22 @@ static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index,
*p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index);
}
+/* reg_sfn_mac_lag_lag_id
+ * LAG ID (pointer into the LAG descriptor table).
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10,
+ MLXSW_REG_SFN_REC_LEN, 0x0C, false);
+
+static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index,
+ char *mac, u16 *p_vid,
+ u16 *p_lag_id)
+{
+ mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac);
+ *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index);
+ *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index);
+}
+
/* SPMS - Switch Port MSTP/RSTP State Register
* -------------------------------------------
* Configures the spanning tree state of a physical port.
@@ -865,6 +948,293 @@ static inline void mlxsw_reg_sftr_pack(char *payload,
mlxsw_reg_sftr_port_mask_set(payload, port, 1);
}
+/* SLDR - Switch LAG Descriptor Register
+ * -----------------------------------------
+ * The switch LAG descriptor register is populated by LAG descriptors.
+ * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to
+ * max_lag-1.
+ */
+#define MLXSW_REG_SLDR_ID 0x2014
+#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */
+
+static const struct mlxsw_reg_info mlxsw_reg_sldr = {
+ .id = MLXSW_REG_SLDR_ID,
+ .len = MLXSW_REG_SLDR_LEN,
+};
+
+enum mlxsw_reg_sldr_op {
+ /* Indicates a creation of a new LAG-ID, lag_id must be valid */
+ MLXSW_REG_SLDR_OP_LAG_CREATE,
+ MLXSW_REG_SLDR_OP_LAG_DESTROY,
+ /* Ports that appear in the list have the Distributor enabled */
+ MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST,
+ /* Removes ports from the disributor list */
+ MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST,
+};
+
+/* reg_sldr_op
+ * Operation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3);
+
+/* reg_sldr_lag_id
+ * LAG identifier. The lag_id is the index into the LAG descriptor table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10);
+
+static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+}
+
+static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+}
+
+/* reg_sldr_num_ports
+ * The number of member ports of the LAG.
+ * Reserved for Create / Destroy operations
+ * For Add / Remove operations - indicates the number of ports in the list.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8);
+
+/* reg_sldr_system_port
+ * System port.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false);
+
+static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id,
+ u8 local_port)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+ mlxsw_reg_sldr_num_ports_set(payload, 1);
+ mlxsw_reg_sldr_system_port_set(payload, 0, local_port);
+}
+
+static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id,
+ u8 local_port)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+ mlxsw_reg_sldr_num_ports_set(payload, 1);
+ mlxsw_reg_sldr_system_port_set(payload, 0, local_port);
+}
+
+/* SLCR - Switch LAG Configuration 2 Register
+ * -------------------------------------------
+ * The Switch LAG Configuration register is used for configuring the
+ * LAG properties of the switch.
+ */
+#define MLXSW_REG_SLCR_ID 0x2015
+#define MLXSW_REG_SLCR_LEN 0x10
+
+static const struct mlxsw_reg_info mlxsw_reg_slcr = {
+ .id = MLXSW_REG_SLCR_ID,
+ .len = MLXSW_REG_SLCR_LEN,
+};
+
+enum mlxsw_reg_slcr_pp {
+ /* Global Configuration (for all ports) */
+ MLXSW_REG_SLCR_PP_GLOBAL,
+ /* Per port configuration, based on local_port field */
+ MLXSW_REG_SLCR_PP_PER_PORT,
+};
+
+/* reg_slcr_pp
+ * Per Port Configuration
+ * Note: Reading at Global mode results in reading port 1 configuration.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1);
+
+/* reg_slcr_local_port
+ * Local port number
+ * Supported from CPU port
+ * Not supported from router port
+ * Reserved when pp = Global Configuration
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_slcr_type {
+ MLXSW_REG_SLCR_TYPE_CRC, /* default */
+ MLXSW_REG_SLCR_TYPE_XOR,
+ MLXSW_REG_SLCR_TYPE_RANDOM,
+};
+
+/* reg_slcr_type
+ * Hash type
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4);
+
+/* Ingress port */
+#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0)
+/* SMAC - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1)
+/* SMAC - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2)
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC \
+ (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP)
+/* DMAC - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3)
+/* DMAC - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4)
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC \
+ (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP)
+/* Ethertype - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5)
+/* Ethertype - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6)
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \
+ (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP)
+/* VLAN ID - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7)
+/* VLAN ID - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8)
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID \
+ (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP)
+/* Source IP address (can be IPv4 or IPv6) */
+#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9)
+/* Destination IP address (can be IPv4 or IPv6) */
+#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10)
+/* TCP/UDP source port */
+#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11)
+/* TCP/UDP destination port*/
+#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12)
+/* IPv4 Protocol/IPv6 Next Header */
+#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13)
+/* IPv6 Flow label */
+#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14)
+/* SID - FCoE source ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15)
+/* DID - FCoE destination ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16)
+/* OXID - FCoE originator exchange ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17)
+/* Destination QP number - for RoCE packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19)
+
+/* reg_slcr_lag_hash
+ * LAG hashing configuration. This is a bitmask, in which each set
+ * bit includes the corresponding item in the LAG hash calculation.
+ * The default lag_hash contains SMAC, DMAC, VLANID and
+ * Ethertype (for all packet types).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20);
+
+static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash)
+{
+ MLXSW_REG_ZERO(slcr, payload);
+ mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL);
+ mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR);
+ mlxsw_reg_slcr_lag_hash_set(payload, lag_hash);
+}
+
+/* SLCOR - Switch LAG Collector Register
+ * -------------------------------------
+ * The Switch LAG Collector register controls the Local Port membership
+ * in a LAG and enablement of the collector.
+ */
+#define MLXSW_REG_SLCOR_ID 0x2016
+#define MLXSW_REG_SLCOR_LEN 0x10
+
+static const struct mlxsw_reg_info mlxsw_reg_slcor = {
+ .id = MLXSW_REG_SLCOR_ID,
+ .len = MLXSW_REG_SLCOR_LEN,
+};
+
+enum mlxsw_reg_slcor_col {
+ /* Port is added with collector disabled */
+ MLXSW_REG_SLCOR_COL_LAG_ADD_PORT,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED,
+ MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT,
+};
+
+/* reg_slcor_col
+ * Collector configuration
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2);
+
+/* reg_slcor_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8);
+
+/* reg_slcor_lag_id
+ * LAG Identifier. Index into the LAG descriptor table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10);
+
+/* reg_slcor_port_index
+ * Port index in the LAG list. Only valid on Add Port to LAG col.
+ * Valid range is from 0 to cap_max_lag_members-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10);
+
+static inline void mlxsw_reg_slcor_pack(char *payload,
+ u8 local_port, u16 lag_id,
+ enum mlxsw_reg_slcor_col col)
+{
+ MLXSW_REG_ZERO(slcor, payload);
+ mlxsw_reg_slcor_col_set(payload, col);
+ mlxsw_reg_slcor_local_port_set(payload, local_port);
+ mlxsw_reg_slcor_lag_id_set(payload, lag_id);
+}
+
+static inline void mlxsw_reg_slcor_port_add_pack(char *payload,
+ u8 local_port, u16 lag_id,
+ u8 port_index)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_ADD_PORT);
+ mlxsw_reg_slcor_port_index_set(payload, port_index);
+}
+
+static inline void mlxsw_reg_slcor_port_remove_pack(char *payload,
+ u8 local_port, u16 lag_id)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT);
+}
+
+static inline void mlxsw_reg_slcor_col_enable_pack(char *payload,
+ u8 local_port, u16 lag_id)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED);
+}
+
+static inline void mlxsw_reg_slcor_col_disable_pack(char *payload,
+ u8 local_port, u16 lag_id)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED);
+}
+
/* SPMLR - Switch Port MAC Learning Register
* -----------------------------------------
* Controls the Switch MAC learning policy per port.
@@ -2653,6 +3023,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "SFGC";
case MLXSW_REG_SFTR_ID:
return "SFTR";
+ case MLXSW_REG_SLDR_ID:
+ return "SLDR";
+ case MLXSW_REG_SLCR_ID:
+ return "SLCR";
+ case MLXSW_REG_SLCOR_ID:
+ return "SLCOR";
case MLXSW_REG_SPMLR_ID:
return "SPMLR";
case MLXSW_REG_SVFA_ID:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 14a9a9ff89ed..3ec07b9a458d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -417,6 +417,10 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
+static void mlxsw_sp_set_rx_mode(struct net_device *dev)
+{
+}
+
static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
@@ -725,6 +729,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_open = mlxsw_sp_port_open,
.ndo_stop = mlxsw_sp_port_stop,
.ndo_start_xmit = mlxsw_sp_port_xmit,
+ .ndo_set_rx_mode = mlxsw_sp_set_rx_mode,
.ndo_set_mac_address = mlxsw_sp_port_set_mac_address,
.ndo_change_mtu = mlxsw_sp_port_change_mtu,
.ndo_get_stats64 = mlxsw_sp_port_get_stats64,
@@ -1707,6 +1712,22 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp)
return 0;
}
+static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
+{
+ char slcr_pl[MLXSW_REG_SLCR_LEN];
+
+ mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
+ MLXSW_REG_SLCR_LAG_HASH_DMAC |
+ MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE |
+ MLXSW_REG_SLCR_LAG_HASH_VLANID |
+ MLXSW_REG_SLCR_LAG_HASH_SIP |
+ MLXSW_REG_SLCR_LAG_HASH_DIP |
+ MLXSW_REG_SLCR_LAG_HASH_SPORT |
+ MLXSW_REG_SLCR_LAG_HASH_DPORT |
+ MLXSW_REG_SLCR_LAG_HASH_IPPROTO);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
+}
+
static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
const struct mlxsw_bus_info *mlxsw_bus_info)
{
@@ -1752,6 +1773,12 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
goto err_buffers_init;
}
+ err = mlxsw_sp_lag_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n");
+ goto err_lag_init;
+ }
+
err = mlxsw_sp_switchdev_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n");
@@ -1761,6 +1788,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
return 0;
err_switchdev_init:
+err_lag_init:
err_buffers_init:
err_flood_init:
mlxsw_sp_traps_fini(mlxsw_sp);
@@ -1788,9 +1816,9 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
.used_max_vepa_channels = 1,
.max_vepa_channels = 0,
.used_max_lag = 1,
- .max_lag = 64,
+ .max_lag = MLXSW_SP_LAG_MAX,
.used_max_port_per_lag = 1,
- .max_port_per_lag = 16,
+ .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX,
.used_max_mid = 1,
.max_mid = 7000,
.used_max_pgt = 1,
@@ -1889,19 +1917,245 @@ static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp->master_bridge.dev = NULL;
}
-static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
+static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id, u8 port_index)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id, port_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *lag_dev,
+ u16 *p_lag_id)
+{
+ struct mlxsw_sp_upper *lag;
+ int free_lag_id = -1;
+ int i;
+
+ for (i = 0; i < MLXSW_SP_LAG_MAX; i++) {
+ lag = mlxsw_sp_lag_get(mlxsw_sp, i);
+ if (lag->ref_count) {
+ if (lag->dev == lag_dev) {
+ *p_lag_id = i;
+ return 0;
+ }
+ } else if (free_lag_id < 0) {
+ free_lag_id = i;
+ }
+ }
+ if (free_lag_id < 0)
+ return -EBUSY;
+ *p_lag_id = free_lag_id;
+ return 0;
+}
+
+static bool
+mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *lag_dev,
+ struct netdev_lag_upper_info *lag_upper_info)
+{
+ u16 lag_id;
+
+ if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0)
+ return false;
+ if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+ return false;
+ return true;
+}
+
+static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
+ u16 lag_id, u8 *p_port_index)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+ if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) {
+ *p_port_index = i;
+ return 0;
+ }
+ }
+ return -EBUSY;
+}
+
+static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_upper *lag;
+ u16 lag_id;
+ u8 port_index;
+ int err;
+
+ err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id);
+ if (err)
+ return err;
+ lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
+ if (!lag->ref_count) {
+ err = mlxsw_sp_lag_create(mlxsw_sp, lag_id);
+ if (err)
+ return err;
+ lag->dev = lag_dev;
+ }
+
+ err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index);
+ if (err)
+ return err;
+ err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index);
+ if (err)
+ goto err_col_port_add;
+ err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id);
+ if (err)
+ goto err_col_port_enable;
+
+ mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index,
+ mlxsw_sp_port->local_port);
+ mlxsw_sp_port->lag_id = lag_id;
+ mlxsw_sp_port->lagged = 1;
+ lag->ref_count++;
+ return 0;
+
+err_col_port_add:
+ if (!lag->ref_count)
+ mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+err_col_port_enable:
+ mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
+ return err;
+}
+
+static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_upper *lag;
+ u16 lag_id = mlxsw_sp_port->lag_id;
+ int err;
+
+ if (!mlxsw_sp_port->lagged)
+ return 0;
+ lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
+ WARN_ON(lag->ref_count == 0);
+
+ err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id);
+ if (err)
+ return err;
+ mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
+ if (err)
+ return err;
+
+ if (lag->ref_count == 1) {
+ err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+ if (err)
+ return err;
+ }
+
+ mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id,
+ mlxsw_sp_port->local_port);
+ mlxsw_sp_port->lagged = 0;
+ lag->ref_count--;
+ return 0;
+}
+
+static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id,
+ mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id,
+ mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ bool lag_tx_enabled)
+{
+ if (lag_tx_enabled)
+ return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port,
+ mlxsw_sp_port->lag_id);
+ else
+ return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port,
+ mlxsw_sp_port->lag_id);
+}
+
+static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netdev_lag_lower_state_info *info)
+{
+ return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
+}
+
+static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
+ unsigned long event, void *ptr)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
struct mlxsw_sp_port *mlxsw_sp_port;
struct net_device *upper_dev;
struct mlxsw_sp *mlxsw_sp;
int err;
- if (!mlxsw_sp_port_dev_check(dev))
- return NOTIFY_DONE;
-
mlxsw_sp_port = netdev_priv(dev);
mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
info = ptr;
@@ -1909,16 +2163,22 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
switch (event) {
case NETDEV_PRECHANGEUPPER:
upper_dev = info->upper_dev;
+ if (!info->master || !info->linking)
+ break;
/* HW limitation forbids to put ports to multiple bridges. */
- if (info->master && info->linking &&
- netif_is_bridge_master(upper_dev) &&
+ if (netif_is_bridge_master(upper_dev) &&
!mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev))
return NOTIFY_BAD;
+ if (netif_is_lag_master(upper_dev) &&
+ !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
+ info->upper_info))
+ return NOTIFY_BAD;
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
- if (info->master &&
- netif_is_bridge_master(upper_dev)) {
+ if (!info->master)
+ break;
+ if (netif_is_bridge_master(upper_dev)) {
if (info->linking) {
err = mlxsw_sp_port_bridge_join(mlxsw_sp_port);
if (err)
@@ -1932,6 +2192,46 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
mlxsw_sp_port->bridged = 0;
mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev);
}
+ } else if (netif_is_lag_master(upper_dev)) {
+ if (info->linking) {
+ err = mlxsw_sp_port_lag_join(mlxsw_sp_port,
+ upper_dev);
+ if (err) {
+ netdev_err(dev, "Failed to join link aggregation\n");
+ return NOTIFY_BAD;
+ }
+ } else {
+ err = mlxsw_sp_port_lag_leave(mlxsw_sp_port,
+ upper_dev);
+ if (err) {
+ netdev_err(dev, "Failed to leave link aggregation\n");
+ return NOTIFY_BAD;
+ }
+ }
+ }
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int err;
+
+ mlxsw_sp_port = netdev_priv(dev);
+ info = ptr;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) {
+ err = mlxsw_sp_port_lag_changed(mlxsw_sp_port,
+ info->lower_state_info);
+ if (err)
+ netdev_err(dev, "Failed to reflect link aggregation lower state change\n");
}
break;
}
@@ -1939,6 +2239,52 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
return NOTIFY_DONE;
}
+static int mlxsw_sp_netdevice_port_event(struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ case NETDEV_CHANGEUPPER:
+ return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr);
+ case NETDEV_CHANGELOWERSTATE:
+ return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev;
+ struct list_head *iter;
+ int ret;
+
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ if (mlxsw_sp_port_dev_check(dev)) {
+ ret = mlxsw_sp_netdevice_port_event(dev, event, ptr);
+ if (ret == NOTIFY_BAD)
+ return ret;
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (mlxsw_sp_port_dev_check(dev))
+ return mlxsw_sp_netdevice_port_event(dev, event, ptr);
+
+ if (netif_is_lag_master(dev))
+ return mlxsw_sp_netdevice_lag_event(dev, event, ptr);
+
+ return NOTIFY_DONE;
+}
+
static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = {
.notifier_call = mlxsw_sp_netdevice_event,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 4365c8bccc6d..48be5a63b9b5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -46,9 +46,16 @@
#include "core.h"
#define MLXSW_SP_VFID_BASE VLAN_N_VID
+#define MLXSW_SP_LAG_MAX 64
+#define MLXSW_SP_PORT_PER_LAG_MAX 16
struct mlxsw_sp_port;
+struct mlxsw_sp_upper {
+ struct net_device *dev;
+ unsigned int ref_count;
+};
+
struct mlxsw_sp {
unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)];
unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)];
@@ -63,12 +70,16 @@ struct mlxsw_sp {
} fdb_notify;
#define MLXSW_SP_DEFAULT_AGEING_TIME 300
u32 ageing_time;
- struct {
- struct net_device *dev;
- unsigned int ref_count;
- } master_bridge;
+ struct mlxsw_sp_upper master_bridge;
+ struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
};
+static inline struct mlxsw_sp_upper *
+mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+ return &mlxsw_sp->lags[lag_id];
+}
+
struct mlxsw_sp_port_pcpu_stats {
u64 rx_packets;
u64 rx_bytes;
@@ -87,8 +98,10 @@ struct mlxsw_sp_port {
u8 learning:1,
learning_sync:1,
uc_flood:1,
- bridged:1;
+ bridged:1,
+ lagged:1;
u16 pvid;
+ u16 lag_id;
/* 802.1Q bridge VLANs */
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
/* VLAN interfaces */
@@ -96,6 +109,18 @@ struct mlxsw_sp_port {
u16 nr_vfids;
};
+static inline struct mlxsw_sp_port *
+mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u8 local_port;
+
+ local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core,
+ lag_id, port_index);
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL;
+}
+
enum mlxsw_sp_flood_table {
MLXSW_SP_FLOOD_TABLE_UC,
MLXSW_SP_FLOOD_TABLE_BM,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index f21e23983a1a..406dab2f6b17 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -490,32 +490,56 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
untagged_flag, pvid_flag);
}
-static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port,
- const char *mac, u16 vid, bool adding,
- bool dynamic)
+static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic)
{
- enum mlxsw_reg_sfd_rec_policy policy;
- enum mlxsw_reg_sfd_op op;
+ return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS :
+ MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY;
+}
+
+static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
+{
+ return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT :
+ MLXSW_REG_SFD_OP_WRITE_REMOVE;
+}
+
+static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port,
+ const char *mac, u16 vid, bool adding,
+ bool dynamic)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char *sfd_pl;
int err;
- if (!vid)
- vid = mlxsw_sp_port->pvid;
-
sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
if (!sfd_pl)
return -ENOMEM;
- policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS :
- MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY;
- op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT :
- MLXSW_REG_SFD_OP_WRITE_REMOVE;
- mlxsw_reg_sfd_pack(sfd_pl, op, 0);
- mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy,
+ mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+ mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP,
mlxsw_sp_port->local_port);
- err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd),
- sfd_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+ kfree(sfd_pl);
+
+ return err;
+}
+
+static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
+ const char *mac, u16 vid, bool adding,
+ bool dynamic)
+{
+ char *sfd_pl;
+ int err;
+
+ sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+ if (!sfd_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+ mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
+ mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP,
+ lag_id);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
kfree(sfd_pl);
return err;
@@ -526,11 +550,21 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port,
const struct switchdev_obj_port_fdb *fdb,
struct switchdev_trans *trans)
{
+ u16 vid = fdb->vid;
+
if (switchdev_trans_ph_prepare(trans))
return 0;
- return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid,
- true, false);
+ if (!vid)
+ vid = mlxsw_sp_port->pvid;
+
+ if (!mlxsw_sp_port->lagged)
+ return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port,
+ fdb->addr, vid, true, false);
+ else
+ return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_port->lag_id,
+ fdb->addr, vid, true, false);
}
static int mlxsw_sp_port_obj_add(struct net_device *dev,
@@ -645,8 +679,15 @@ static int
mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port,
const struct switchdev_obj_port_fdb *fdb)
{
- return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid,
- false, false);
+ if (!mlxsw_sp_port->lagged)
+ return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port,
+ fdb->addr, fdb->vid,
+ false, false);
+ else
+ return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_port->lag_id,
+ fdb->addr, fdb->vid,
+ false, false);
}
static int mlxsw_sp_port_obj_del(struct net_device *dev,
@@ -672,14 +713,30 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev,
return err;
}
+static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
+ u16 lag_id)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+ mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
+ if (mlxsw_sp_port)
+ return mlxsw_sp_port;
+ }
+ return NULL;
+}
+
static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
struct switchdev_obj_port_fdb *fdb,
switchdev_obj_dump_cb_t *cb)
{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char *sfd_pl;
char mac[ETH_ALEN];
u16 vid;
u8 local_port;
+ u16 lag_id;
u8 num_rec;
int stored_err = 0;
int i;
@@ -692,8 +749,7 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
do {
mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT);
- err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core,
- MLXSW_REG(sfd), sfd_pl);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
if (err)
goto out;
@@ -718,6 +774,20 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
if (err)
stored_err = err;
}
+ break;
+ case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG:
+ mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i,
+ mac, &vid, &lag_id);
+ if (mlxsw_sp_port ==
+ mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) {
+ ether_addr_copy(fdb->addr, mac);
+ fdb->ndm_state = NUD_REACHABLE;
+ fdb->vid = vid;
+ err = cb(&fdb->obj);
+ if (err)
+ stored_err = err;
+ }
+ break;
}
}
} while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT);
@@ -779,6 +849,21 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
.switchdev_port_obj_dump = mlxsw_sp_port_obj_dump,
};
+static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync,
+ bool adding, char *mac, u16 vid,
+ struct net_device *dev)
+{
+ struct switchdev_notifier_fdb_info info;
+ unsigned long notifier_type;
+
+ if (learning && learning_sync) {
+ info.addr = mac;
+ info.vid = vid;
+ notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
+ call_switchdev_notifiers(notifier_type, dev, &info.info);
+ }
+}
+
static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
char *sfn_pl, int rec_index,
bool adding)
@@ -796,24 +881,49 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
return;
}
- err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid,
- adding && mlxsw_sp_port->learning, true);
+ err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid,
+ adding && mlxsw_sp_port->learning, true);
if (err) {
if (net_ratelimit())
netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
return;
}
- if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) {
- struct switchdev_notifier_fdb_info info;
- unsigned long notifier_type;
+ mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
+ mlxsw_sp_port->learning_sync,
+ adding, mac, vid, mlxsw_sp_port->dev);
+}
- info.addr = mac;
- info.vid = vid;
- notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
- call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev,
- &info.info);
+static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
+ char *sfn_pl, int rec_index,
+ bool adding)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ char mac[ETH_ALEN];
+ u16 lag_id;
+ u16 vid;
+ int err;
+
+ mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id);
+ mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
+ if (!mlxsw_sp_port) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n");
+ return;
}
+
+ err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid,
+ adding && mlxsw_sp_port->learning,
+ true);
+ if (err) {
+ if (net_ratelimit())
+ netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
+ return;
+ }
+
+ mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
+ mlxsw_sp_port->learning_sync,
+ adding, mac, vid,
+ mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev);
}
static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
@@ -828,6 +938,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl,
rec_index, false);
break;
+ case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG:
+ mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
+ rec_index, true);
+ break;
+ case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG:
+ mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
+ rec_index, false);
+ break;
}
}
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 651d35ea22c5..059c0f60a2b2 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -91,10 +91,24 @@ void team_modeop_port_change_dev_addr(struct team *team,
}
EXPORT_SYMBOL(team_modeop_port_change_dev_addr);
+static void team_lower_state_changed(struct team_port *port)
+{
+ struct netdev_lag_lower_state_info info;
+
+ info.link_up = port->linkup;
+ info.tx_enabled = team_port_enabled(port);
+ netdev_lower_state_changed(port->dev, &info);
+}
+
static void team_refresh_port_linkup(struct team_port *port)
{
- port->linkup = port->user.linkup_enabled ? port->user.linkup :
- port->state.linkup;
+ bool new_linkup = port->user.linkup_enabled ? port->user.linkup :
+ port->state.linkup;
+
+ if (port->linkup != new_linkup) {
+ port->linkup = new_linkup;
+ team_lower_state_changed(port);
+ }
}
@@ -932,6 +946,7 @@ static void team_port_enable(struct team *team,
team->ops.port_enabled(team, port);
team_notify_peers(team);
team_mcast_rejoin(team);
+ team_lower_state_changed(port);
}
static void __reconstruct_port_hlist(struct team *team, int rm_index)
@@ -963,6 +978,7 @@ static void team_port_disable(struct team *team,
team_adjust_ops(team);
team_notify_peers(team);
team_mcast_rejoin(team);
+ team_lower_state_changed(port);
}
#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \
@@ -1078,23 +1094,24 @@ static void team_port_disable_netpoll(struct team_port *port)
}
#endif
-static int team_upper_dev_link(struct net_device *dev,
- struct net_device *port_dev)
+static int team_upper_dev_link(struct team *team, struct team_port *port)
{
+ struct netdev_lag_upper_info lag_upper_info;
int err;
- err = netdev_master_upper_dev_link(port_dev, dev);
+ lag_upper_info.tx_type = team->mode->lag_tx_type;
+ err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
+ &lag_upper_info);
if (err)
return err;
- port_dev->priv_flags |= IFF_TEAM_PORT;
+ port->dev->priv_flags |= IFF_TEAM_PORT;
return 0;
}
-static void team_upper_dev_unlink(struct net_device *dev,
- struct net_device *port_dev)
+static void team_upper_dev_unlink(struct team *team, struct team_port *port)
{
- netdev_upper_dev_unlink(port_dev, dev);
- port_dev->priv_flags &= ~IFF_TEAM_PORT;
+ netdev_upper_dev_unlink(port->dev, team->dev);
+ port->dev->priv_flags &= ~IFF_TEAM_PORT;
}
static void __team_port_change_port_added(struct team_port *port, bool linkup);
@@ -1194,7 +1211,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
goto err_handler_register;
}
- err = team_upper_dev_link(dev, port_dev);
+ err = team_upper_dev_link(team, port);
if (err) {
netdev_err(dev, "Device %s failed to set upper link\n",
portname);
@@ -1220,7 +1237,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
return 0;
err_option_port_add:
- team_upper_dev_unlink(dev, port_dev);
+ team_upper_dev_unlink(team, port);
err_set_upper_link:
netdev_rx_handler_unregister(port_dev);
@@ -1264,7 +1281,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
team_port_disable(team, port);
list_del_rcu(&port->list);
- team_upper_dev_unlink(dev, port_dev);
+ team_upper_dev_unlink(team, port);
netdev_rx_handler_unregister(port_dev);
team_port_disable_netpoll(port);
vlan_vids_del_by_dev(port_dev, dev);
@@ -2054,6 +2071,7 @@ static void team_setup(struct net_device *dev)
dev->flags |= IFF_MULTICAST;
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->priv_flags |= IFF_TEAM;
/*
* Indicate we support unicast address filtering. That way core won't
@@ -2420,9 +2438,13 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
struct nlattr *nl_option;
LIST_HEAD(opt_inst_list);
+ rtnl_lock();
+
team = team_nl_team_get(info);
- if (!team)
- return -EINVAL;
+ if (!team) {
+ err = -EINVAL;
+ goto rtnl_unlock;
+ }
err = -EINVAL;
if (!info->attrs[TEAM_ATTR_LIST_OPTION]) {
@@ -2549,7 +2571,8 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
team_put:
team_nl_team_put(team);
-
+rtnl_unlock:
+ rtnl_unlock();
return err;
}
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
index 40fd3381b693..3f189823ba3b 100644
--- a/drivers/net/team/team_mode_activebackup.c
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -127,6 +127,7 @@ static const struct team_mode ab_mode = {
.owner = THIS_MODULE,
.priv_size = sizeof(struct ab_priv),
.ops = &ab_mode_ops,
+ .lag_tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP,
};
static int __init ab_init_module(void)
diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c
index c366cd299c06..302ff35b0cbc 100644
--- a/drivers/net/team/team_mode_broadcast.c
+++ b/drivers/net/team/team_mode_broadcast.c
@@ -56,6 +56,7 @@ static const struct team_mode bc_mode = {
.kind = "broadcast",
.owner = THIS_MODULE,
.ops = &bc_mode_ops,
+ .lag_tx_type = NETDEV_LAG_TX_TYPE_BROADCAST,
};
static int __init bc_init_module(void)
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index a1536d0d83a9..cdb19b385d42 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -661,6 +661,7 @@ static const struct team_mode lb_mode = {
.priv_size = sizeof(struct lb_priv),
.port_priv_size = sizeof(struct lb_port_priv),
.ops = &lb_mode_ops,
+ .lag_tx_type = NETDEV_LAG_TX_TYPE_HASH,
};
static int __init lb_init_module(void)
diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c
index cd2f692b8074..215f845782db 100644
--- a/drivers/net/team/team_mode_random.c
+++ b/drivers/net/team/team_mode_random.c
@@ -46,6 +46,7 @@ static const struct team_mode rnd_mode = {
.kind = "random",
.owner = THIS_MODULE,
.ops = &rnd_mode_ops,
+ .lag_tx_type = NETDEV_LAG_TX_TYPE_RANDOM,
};
static int __init rnd_init_module(void)
diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c
index 53665850b59e..0aa234118c03 100644
--- a/drivers/net/team/team_mode_roundrobin.c
+++ b/drivers/net/team/team_mode_roundrobin.c
@@ -58,6 +58,7 @@ static const struct team_mode rr_mode = {
.owner = THIS_MODULE,
.priv_size = sizeof(struct rr_priv),
.ops = &rr_mode_ops,
+ .lag_tx_type = NETDEV_LAG_TX_TYPE_ROUNDROBIN,
};
static int __init rr_init_module(void)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index c2d54c4ed556..8944a49cda15 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
goto out_fail;
}
- ret = netdev_master_upper_dev_link(port_dev, dev);
+ ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
if (ret < 0)
goto out_unregister;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index a6aa970758a2..b84e49c3a738 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -164,6 +164,7 @@ struct team_mode {
size_t priv_size;
size_t port_priv_size;
const struct team_mode_ops *ops;
+ enum netdev_lag_tx_type lag_tx_type;
};
#define TEAM_PORT_HASHBITS 4
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0b473cbfa7ef..7d3a85faefb7 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -251,6 +251,7 @@ enum mlx5_event {
MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb,
MLX5_EVENT_TYPE_PAGE_FAULT = 0xc,
+ MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd,
};
enum {
@@ -520,6 +521,12 @@ struct mlx5_eqe_page_fault {
__be32 flags_qpn;
} __packed;
+struct mlx5_eqe_vport_change {
+ u8 rsvd0[2];
+ __be16 vport_num;
+ __be32 rsvd1[6];
+} __packed;
+
union ev_data {
__be32 raw[7];
struct mlx5_eqe_cmd cmd;
@@ -532,6 +539,7 @@ union ev_data {
struct mlx5_eqe_stall_vl stall_vl;
struct mlx5_eqe_page_req req_pages;
struct mlx5_eqe_page_fault page_fault;
+ struct mlx5_eqe_vport_change vport_change;
} __packed;
struct mlx5_eqe {
@@ -1067,6 +1075,12 @@ enum {
};
enum {
+ MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0,
+ MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1,
+ MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2,
+};
+
+enum {
MLX5_L3_PROT_TYPE_IPV4 = 0,
MLX5_L3_PROT_TYPE_IPV6 = 1,
};
@@ -1102,6 +1116,12 @@ enum {
MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 2,
};
+enum mlx5_list_type {
+ MLX5_NVPRT_LIST_TYPE_UC = 0x0,
+ MLX5_NVPRT_LIST_TYPE_MC = 0x1,
+ MLX5_NVPRT_LIST_TYPE_VLAN = 0x2,
+};
+
enum {
MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0,
MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1,
@@ -1124,6 +1144,8 @@ enum mlx5_cap_type {
MLX5_CAP_IPOIB_OFFLOADS,
MLX5_CAP_EOIB_OFFLOADS,
MLX5_CAP_FLOW_TABLE,
+ MLX5_CAP_ESWITCH_FLOW_TABLE,
+ MLX5_CAP_ESWITCH,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1161,6 +1183,28 @@ enum mlx5_cap_type {
#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap)
+#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
+ MLX5_GET(flow_table_eswitch_cap, \
+ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+
+#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \
+ MLX5_GET(flow_table_eswitch_cap, \
+ mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+
+#define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \
+ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap)
+
+#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \
+ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap)
+
+#define MLX5_CAP_ESW(mdev, cap) \
+ MLX5_GET(e_switch_cap, \
+ mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap)
+
+#define MLX5_CAP_ESW_MAX(mdev, cap) \
+ MLX5_GET(e_switch_cap, \
+ mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap)
+
#define MLX5_CAP_ODP(mdev, cap)\
MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5c857f2a20d7..ac098b6b97bf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -426,11 +426,23 @@ struct mlx5_mr_table {
struct radix_tree_root tree;
};
+struct mlx5_vf_context {
+ int enabled;
+};
+
+struct mlx5_core_sriov {
+ struct mlx5_vf_context *vfs_ctx;
+ int num_vfs;
+ int enabled_vfs;
+};
+
struct mlx5_irq_info {
cpumask_var_t mask;
char name[MLX5_MAX_IRQ_NAME];
};
+struct mlx5_eswitch;
+
struct mlx5_priv {
char name[MLX5_MAX_NAME_LEN];
struct mlx5_eq_table eq_table;
@@ -447,6 +459,7 @@ struct mlx5_priv {
int fw_pages;
atomic_t reg_pages;
struct list_head free_list;
+ int vfs_pages;
struct mlx5_core_health health;
@@ -485,6 +498,10 @@ struct mlx5_priv {
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
+
+ struct mlx5_eswitch *eswitch;
+ struct mlx5_core_sriov sriov;
+ unsigned long pci_dev_data;
};
enum mlx5_device_state {
@@ -739,6 +756,8 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
+int mlx5_sriov_init(struct mlx5_core_dev *dev);
+int mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
s32 npages);
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
@@ -884,6 +903,15 @@ struct mlx5_profile {
} mr_cache[MAX_MR_CACHE_ENTRIES];
};
+enum {
+ MLX5_PCI_DEV_IS_VF = 1 << 0,
+};
+
+static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
+{
+ return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
+}
+
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {
diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h
index 5f922c6d4fc2..0f2a15cf3317 100644
--- a/include/linux/mlx5/flow_table.h
+++ b/include/linux/mlx5/flow_table.h
@@ -41,6 +41,15 @@ struct mlx5_flow_table_group {
u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
};
+struct mlx5_flow_destination {
+ enum mlx5_flow_destination_type type;
+ union {
+ u32 tir_num;
+ void *ft;
+ u32 vport_num;
+ };
+};
+
void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type,
u16 num_groups,
struct mlx5_flow_table_group *group);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1565324eb620..f5d94495758a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -447,6 +447,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
u8 reserved_3[0x7200];
};
+struct mlx5_ifc_flow_table_eswitch_cap_bits {
+ u8 reserved_0[0x200];
+
+ struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
+
+ struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_ingress;
+
+ struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress;
+
+ u8 reserved_1[0x7800];
+};
+
+struct mlx5_ifc_e_switch_cap_bits {
+ u8 vport_svlan_strip[0x1];
+ u8 vport_cvlan_strip[0x1];
+ u8 vport_svlan_insert[0x1];
+ u8 vport_cvlan_insert_if_not_exist[0x1];
+ u8 vport_cvlan_insert_overwrite[0x1];
+ u8 reserved_0[0x1b];
+
+ u8 reserved_1[0x7e0];
+};
+
struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
u8 csum_cap[0x1];
u8 vlan_cap[0x1];
@@ -665,7 +688,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_17[0x1];
u8 ets[0x1];
u8 nic_flow_table[0x1];
- u8 reserved_18[0x4];
+ u8 eswitch_flow_table[0x1];
+ u8 early_vf_enable;
+ u8 reserved_18[0x2];
u8 local_ca_ack_delay[0x5];
u8 reserved_19[0x6];
u8 port_type[0x2];
@@ -787,27 +812,36 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_60[0x1b];
u8 log_max_wq_sz[0x5];
- u8 reserved_61[0xa0];
-
+ u8 nic_vport_change_event[0x1];
+ u8 reserved_61[0xa];
+ u8 log_max_vlan_list[0x5];
u8 reserved_62[0x3];
+ u8 log_max_current_mc_list[0x5];
+ u8 reserved_63[0x3];
+ u8 log_max_current_uc_list[0x5];
+
+ u8 reserved_64[0x80];
+
+ u8 reserved_65[0x3];
u8 log_max_l2_table[0x5];
- u8 reserved_63[0x8];
+ u8 reserved_66[0x8];
u8 log_uar_page_sz[0x10];
- u8 reserved_64[0x100];
+ u8 reserved_67[0xe0];
- u8 reserved_65[0x1f];
+ u8 reserved_68[0x1f];
u8 cqe_zip[0x1];
u8 cqe_zip_timeout[0x10];
u8 cqe_zip_max_num[0x10];
- u8 reserved_66[0x220];
+ u8 reserved_69[0x220];
};
-enum {
- MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1,
- MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2,
+enum mlx5_flow_destination_type {
+ MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0,
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1,
+ MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2,
};
struct mlx5_ifc_dest_format_struct_bits {
@@ -900,6 +934,13 @@ struct mlx5_ifc_mac_address_layout_bits {
u8 mac_addr_31_0[0x20];
};
+struct mlx5_ifc_vlan_layout_bits {
+ u8 reserved_0[0x14];
+ u8 vlan[0x0c];
+
+ u8 reserved_1[0x20];
+};
+
struct mlx5_ifc_cong_control_r_roce_ecn_np_bits {
u8 reserved_0[0xa0];
@@ -1829,6 +1870,8 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_roce_cap_bits roce_cap;
struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps;
struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
+ struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
+ struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
u8 reserved_0[0x8000];
};
@@ -2133,24 +2176,35 @@ struct mlx5_ifc_rmpc_bits {
struct mlx5_ifc_wq_bits wq;
};
-enum {
- MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0,
-};
-
struct mlx5_ifc_nic_vport_context_bits {
u8 reserved_0[0x1f];
u8 roce_en[0x1];
- u8 reserved_1[0x760];
+ u8 arm_change_event[0x1];
+ u8 reserved_1[0x1a];
+ u8 event_on_mtu[0x1];
+ u8 event_on_promisc_change[0x1];
+ u8 event_on_vlan_change[0x1];
+ u8 event_on_mc_address_change[0x1];
+ u8 event_on_uc_address_change[0x1];
- u8 reserved_2[0x5];
+ u8 reserved_2[0xf0];
+
+ u8 mtu[0x10];
+
+ u8 reserved_3[0x640];
+
+ u8 promisc_uc[0x1];
+ u8 promisc_mc[0x1];
+ u8 promisc_all[0x1];
+ u8 reserved_4[0x2];
u8 allowed_list_type[0x3];
- u8 reserved_3[0xc];
+ u8 reserved_5[0xc];
u8 allowed_list_size[0xc];
struct mlx5_ifc_mac_address_layout_bits permanent_address;
- u8 reserved_4[0x20];
+ u8 reserved_6[0x20];
u8 current_uc_mac_address[0][0x40];
};
@@ -2263,6 +2317,26 @@ struct mlx5_ifc_hca_vport_context_bits {
u8 reserved_6[0xca0];
};
+struct mlx5_ifc_esw_vport_context_bits {
+ u8 reserved_0[0x3];
+ u8 vport_svlan_strip[0x1];
+ u8 vport_cvlan_strip[0x1];
+ u8 vport_svlan_insert[0x1];
+ u8 vport_cvlan_insert[0x2];
+ u8 reserved_1[0x18];
+
+ u8 reserved_2[0x20];
+
+ u8 svlan_cfi[0x1];
+ u8 svlan_pcp[0x3];
+ u8 svlan_id[0xc];
+ u8 cvlan_cfi[0x1];
+ u8 cvlan_pcp[0x3];
+ u8 cvlan_id[0xc];
+
+ u8 reserved_3[0x7a0];
+};
+
enum {
MLX5_EQC_STATUS_OK = 0x0,
MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa,
@@ -2940,6 +3014,7 @@ struct mlx5_ifc_query_vport_state_out_bits {
enum {
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT = 0x1,
};
struct mlx5_ifc_query_vport_state_in_bits {
@@ -3700,6 +3775,64 @@ struct mlx5_ifc_query_flow_group_in_bits {
u8 reserved_5[0x120];
};
+struct mlx5_ifc_query_esw_vport_context_out_bits {
+ u8 status[0x8];
+ u8 reserved_0[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_1[0x40];
+
+ struct mlx5_ifc_esw_vport_context_bits esw_vport_context;
+};
+
+struct mlx5_ifc_query_esw_vport_context_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_0[0x10];
+
+ u8 reserved_1[0x10];
+ u8 op_mod[0x10];
+
+ u8 other_vport[0x1];
+ u8 reserved_2[0xf];
+ u8 vport_number[0x10];
+
+ u8 reserved_3[0x20];
+};
+
+struct mlx5_ifc_modify_esw_vport_context_out_bits {
+ u8 status[0x8];
+ u8 reserved_0[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_1[0x40];
+};
+
+struct mlx5_ifc_esw_vport_context_fields_select_bits {
+ u8 reserved[0x1c];
+ u8 vport_cvlan_insert[0x1];
+ u8 vport_svlan_insert[0x1];
+ u8 vport_cvlan_strip[0x1];
+ u8 vport_svlan_strip[0x1];
+};
+
+struct mlx5_ifc_modify_esw_vport_context_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_0[0x10];
+
+ u8 reserved_1[0x10];
+ u8 op_mod[0x10];
+
+ u8 other_vport[0x1];
+ u8 reserved_2[0xf];
+ u8 vport_number[0x10];
+
+ struct mlx5_ifc_esw_vport_context_fields_select_bits field_select;
+
+ struct mlx5_ifc_esw_vport_context_bits esw_vport_context;
+};
+
struct mlx5_ifc_query_eq_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
@@ -4228,7 +4361,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits {
};
struct mlx5_ifc_modify_nic_vport_field_select_bits {
- u8 reserved_0[0x1c];
+ u8 reserved_0[0x19];
+ u8 mtu[0x1];
+ u8 change_event[0x1];
+ u8 promisc[0x1];
u8 permanent_address[0x1];
u8 addresses_list[0x1];
u8 roce_en[0x1];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 967e0fd06e89..638f2ca7a527 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -34,9 +34,17 @@
#define __MLX5_VPORT_H__
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
-u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod);
-void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr);
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
+u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport);
+int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u8 state);
+int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *addr);
+int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
+ u16 vport, u8 *addr);
int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
u8 port_num, u16 vf_num, u16 gid_index,
union ib_gid *gid);
@@ -51,5 +59,30 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
u64 *sys_image_guid);
int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
u64 *node_guid);
+int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ u32 vport,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int *list_size);
+int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int list_size);
+int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ u32 vport,
+ int *promisc_uc,
+ int *promisc_mc,
+ int *promisc_all);
+int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ int promisc_uc,
+ int promisc_mc,
+ int promisc_all);
+int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u32 vport,
+ u16 vlans[],
+ int *size);
+int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u16 vlans[],
+ int list_size);
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fcbc5259c630..fa84b59eb197 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1273,6 +1273,7 @@ struct net_device_ops {
* @IFF_NO_QUEUE: device can run without qdisc attached
* @IFF_OPENVSWITCH: device is a Open vSwitch master
* @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
+ * @IFF_TEAM: device is a team device
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1299,6 +1300,7 @@ enum netdev_priv_flags {
IFF_NO_QUEUE = 1<<21,
IFF_OPENVSWITCH = 1<<22,
IFF_L3MDEV_SLAVE = 1<<23,
+ IFF_TEAM = 1<<24,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1325,6 +1327,7 @@ enum netdev_priv_flags {
#define IFF_NO_QUEUE IFF_NO_QUEUE
#define IFF_OPENVSWITCH IFF_OPENVSWITCH
#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE
+#define IFF_TEAM IFF_TEAM
/**
* struct net_device - The DEVICE structure.
@@ -2107,6 +2110,24 @@ struct pcpu_sw_netstats {
#define netdev_alloc_pcpu_stats(type) \
__netdev_alloc_pcpu_stats(type, GFP_KERNEL);
+enum netdev_lag_tx_type {
+ NETDEV_LAG_TX_TYPE_UNKNOWN,
+ NETDEV_LAG_TX_TYPE_RANDOM,
+ NETDEV_LAG_TX_TYPE_BROADCAST,
+ NETDEV_LAG_TX_TYPE_ROUNDROBIN,
+ NETDEV_LAG_TX_TYPE_ACTIVEBACKUP,
+ NETDEV_LAG_TX_TYPE_HASH,
+};
+
+struct netdev_lag_upper_info {
+ enum netdev_lag_tx_type tx_type;
+};
+
+struct netdev_lag_lower_state_info {
+ u8 link_up : 1,
+ tx_enabled : 1;
+};
+
#include <linux/notifier.h>
/* netdevice notifier chain. Please remember to update the rtnetlink
@@ -2142,6 +2163,7 @@ struct pcpu_sw_netstats {
#define NETDEV_CHANGEINFODATA 0x0018
#define NETDEV_BONDING_INFO 0x0019
#define NETDEV_PRECHANGEUPPER 0x001A
+#define NETDEV_CHANGELOWERSTATE 0x001B
int register_netdevice_notifier(struct notifier_block *nb);
int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -2160,6 +2182,12 @@ struct netdev_notifier_changeupper_info {
struct net_device *upper_dev; /* new upper dev */
bool master; /* is upper dev master */
bool linking; /* is the nofication for link or unlink */
+ void *upper_info; /* upper dev info */
+};
+
+struct netdev_notifier_changelowerstate_info {
+ struct netdev_notifier_info info; /* must be first */
+ void *lower_state_info; /* is lower dev state */
};
static inline void netdev_notifier_info_init(struct netdev_notifier_info *info,
@@ -3616,15 +3644,15 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev);
int netdev_master_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev);
-int netdev_master_upper_dev_link_private(struct net_device *dev,
- struct net_device *upper_dev,
- void *private);
+ struct net_device *upper_dev,
+ void *upper_priv, void *upper_info);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev);
void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
void *netdev_lower_dev_get_private(struct net_device *dev,
struct net_device *lower_dev);
+void netdev_lower_state_changed(struct net_device *lower_dev,
+ void *lower_state_info);
/* RSS keys are 40 or 52 bytes long */
#define NETDEV_RSS_KEY_LEN 52
@@ -3889,6 +3917,26 @@ static inline bool netif_is_ovs_master(const struct net_device *dev)
return dev->priv_flags & IFF_OPENVSWITCH;
}
+static inline bool netif_is_team_master(struct net_device *dev)
+{
+ return dev->priv_flags & IFF_TEAM;
+}
+
+static inline bool netif_is_team_port(struct net_device *dev)
+{
+ return dev->priv_flags & IFF_TEAM_PORT;
+}
+
+static inline bool netif_is_lag_master(struct net_device *dev)
+{
+ return netif_is_bond_master(dev) || netif_is_team_master(dev);
+}
+
+static inline bool netif_is_lag_port(struct net_device *dev)
+{
+ return netif_is_bond_slave(dev) || netif_is_team_port(dev);
+}
+
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev)
{
diff --git a/include/net/bonding.h b/include/net/bonding.h
index c1740a2794a3..ee6c52053aa3 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -165,7 +165,8 @@ struct slave {
u8 backup:1, /* indicates backup slave. Value corresponds with
BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
inactive:1, /* indicates inactive slave */
- should_notify:1; /* indicateds whether the state changed */
+ should_notify:1, /* indicates whether the state changed */
+ should_notify_link:1; /* indicates whether the link changed */
u8 duplex;
u32 original_mtu;
u32 link_failure_count;
@@ -246,6 +247,7 @@ struct bonding {
((struct slave *) rtnl_dereference(dev->rx_handler_data))
void bond_queue_slave_event(struct slave *slave);
+void bond_lower_state_changed(struct slave *slave);
struct bond_vlan_tag {
__be16 vlan_proto;
@@ -327,6 +329,7 @@ static inline void bond_set_active_slave(struct slave *slave)
if (slave->backup) {
slave->backup = 0;
bond_queue_slave_event(slave);
+ bond_lower_state_changed(slave);
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -336,6 +339,7 @@ static inline void bond_set_backup_slave(struct slave *slave)
if (!slave->backup) {
slave->backup = 1;
bond_queue_slave_event(slave);
+ bond_lower_state_changed(slave);
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -348,6 +352,7 @@ static inline void bond_set_slave_state(struct slave *slave,
slave->backup = slave_state;
if (notify) {
+ bond_lower_state_changed(slave);
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
bond_queue_slave_event(slave);
slave->should_notify = 0;
@@ -379,6 +384,7 @@ static inline void bond_slave_state_notify(struct bonding *bond)
bond_for_each_slave(bond, tmp, iter) {
if (tmp->should_notify) {
+ bond_lower_state_changed(tmp);
rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC);
tmp->should_notify = 0;
}
@@ -504,10 +510,37 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
return slave->inactive;
}
-static inline void bond_set_slave_link_state(struct slave *slave, int state)
+static inline void bond_set_slave_link_state(struct slave *slave, int state,
+ bool notify)
{
+ if (slave->link == state)
+ return;
+
slave->link = state;
- bond_queue_slave_event(slave);
+ if (notify) {
+ bond_queue_slave_event(slave);
+ bond_lower_state_changed(slave);
+ slave->should_notify_link = 0;
+ } else {
+ if (slave->should_notify_link)
+ slave->should_notify_link = 0;
+ else
+ slave->should_notify_link = 1;
+ }
+}
+
+static inline void bond_slave_link_notify(struct bonding *bond)
+{
+ struct list_head *iter;
+ struct slave *tmp;
+
+ bond_for_each_slave(bond, tmp, iter) {
+ if (tmp->should_notify_link) {
+ bond_queue_slave_event(tmp);
+ bond_lower_state_changed(tmp);
+ tmp->should_notify_link = 0;
+ }
+ }
}
static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local)
diff --git a/lib/netdev-notifier-error-inject.c b/lib/netdev-notifier-error-inject.c
index b2b856675d61..13e9c62e216f 100644
--- a/lib/netdev-notifier-error-inject.c
+++ b/lib/netdev-notifier-error-inject.c
@@ -18,6 +18,7 @@ static struct notifier_err_inject netdev_notifier_err_inject = {
{ NOTIFIER_ERR_INJECT_ACTION(NETDEV_POST_INIT) },
{ NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRECHANGEMTU) },
{ NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRECHANGEUPPER) },
+ { NOTIFIER_ERR_INJECT_ACTION(NETDEV_CHANGEUPPER) },
{}
}
};
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index f11345e163d7..aa8867e1d983 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -464,7 +464,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
- ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
+ ret = netdev_master_upper_dev_link(hard_iface->net_dev,
+ soft_iface, NULL, NULL);
if (ret)
goto err_dev;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ec02f5869a78..8d1d4a22c50d 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev->priv_flags |= IFF_BRIDGE_PORT;
- err = netdev_master_upper_dev_link(dev, br->dev);
+ err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
if (err)
goto err5;
diff --git a/net/core/dev.c b/net/core/dev.c
index 5df6cbce727c..d1706e88fbeb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *private)
+ void *upper_priv, void *upper_info)
{
struct netdev_notifier_changeupper_info changeupper_info;
struct netdev_adjacent *i, *j, *to_i, *to_j;
@@ -5445,6 +5445,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
changeupper_info.upper_dev = upper_dev;
changeupper_info.master = master;
changeupper_info.linking = true;
+ changeupper_info.upper_info = upper_info;
ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
&changeupper_info.info);
@@ -5452,7 +5453,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
master);
if (ret)
return ret;
@@ -5490,8 +5491,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
goto rollback_lower_mesh;
}
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
- &changeupper_info.info);
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ &changeupper_info.info);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto rollback_lower_mesh;
+
return 0;
rollback_lower_mesh:
@@ -5545,7 +5550,7 @@ rollback_mesh:
int netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -5553,6 +5558,8 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* netdev_master_upper_dev_link - Add a master link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @upper_priv: upper device private
+ * @upper_info: upper info to be passed down via notifier
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
@@ -5561,20 +5568,14 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* counts are adjusted and the function returns zero.
*/
int netdev_master_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ void *upper_priv, void *upper_info)
{
- return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, true,
+ upper_priv, upper_info);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
-int netdev_master_upper_dev_link_private(struct net_device *dev,
- struct net_device *upper_dev,
- void *private)
-{
- return __netdev_upper_dev_link(dev, upper_dev, true, private);
-}
-EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
-
/**
* netdev_upper_dev_unlink - Removes a link to upper device
* @dev: device
@@ -5755,6 +5756,26 @@ int dev_get_nest_level(struct net_device *dev,
}
EXPORT_SYMBOL(dev_get_nest_level);
+/**
+ * netdev_lower_change - Dispatch event about lower device state change
+ * @lower_dev: device
+ * @lower_state_info: state to dispatch
+ *
+ * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_lower_state_changed(struct net_device *lower_dev,
+ void *lower_state_info)
+{
+ struct netdev_notifier_changelowerstate_info changelowerstate_info;
+
+ ASSERT_RTNL();
+ changelowerstate_info.lower_state_info = lower_state_info;
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ &changelowerstate_info.info);
+}
+EXPORT_SYMBOL(netdev_lower_state_changed);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b327368a3848..8f4dd4c39bfe 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
rtnl_lock();
err = netdev_master_upper_dev_link(vport->dev,
- get_dpdev(vport->dp));
+ get_dpdev(vport->dp), NULL, NULL);
if (err)
goto error_unlock;