summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c113
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c128
17 files changed, 480 insertions, 232 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index fcfd38fa9e6c..a7170ab3af97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
- en/qos.o en/trap.o en/fs_tt_redirect.o
+ en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o
#
# Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c14e06ca64d8..e29ac77e9bec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -59,6 +59,7 @@
#include "lib/hv_vhca.h"
#include "lib/clock.h"
#include "en/rx_res.h"
+#include "en/selq.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -172,8 +173,9 @@ struct page_pool;
#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
-#define MLX5E_MAX_KLM_PER_WQE \
- MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
+#define MLX5E_MAX_KLM_PER_WQE(mdev) \
+ MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \
+ << MLX5_MKEY_BSF_OCTO_SIZE)
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
@@ -221,6 +223,32 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
+/* The maximum WQE size can be retrieved by max_wqe_sz_sq in
+ * bytes units. Driver hardens the limitation to 1KB (16
+ * WQEBBs), unless firmware capability is stricter.
+ */
+static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev)
+{
+ return min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS,
+ MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB);
+}
+
+static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs)
+{
+/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS.
+ * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16,
+ * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64)
+ * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower
+ * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be
+ * cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+ return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
+#else
+ return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2);
+#endif
+}
+
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth;
@@ -427,12 +455,12 @@ struct mlx5e_txqsq {
struct netdev_queue *txq;
u32 sqn;
u16 stop_room;
+ u16 max_sq_mpw_wqebbs;
u8 min_inline_mode;
struct device *pdev;
__be32 mkey_be;
unsigned long state;
unsigned int hw_mtu;
- struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
struct net_device *netdev;
struct mlx5_core_dev *mdev;
@@ -446,6 +474,7 @@ struct mlx5e_txqsq {
struct work_struct recover_work;
struct mlx5e_ptpsq *ptpsq;
cqe_ts_to_ns ptp_cyc2time;
+ u16 max_sq_wqebbs;
} ____cacheline_aligned_in_smp;
struct mlx5e_dma_info {
@@ -540,6 +569,8 @@ struct mlx5e_xdpsq {
u32 sqn;
struct device *pdev;
__be32 mkey_be;
+ u16 stop_room;
+ u16 max_sq_mpw_wqebbs;
u8 min_inline_mode;
unsigned long state;
unsigned int hw_mtu;
@@ -547,6 +578,7 @@ struct mlx5e_xdpsq {
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
+ u16 max_sq_wqebbs;
} ____cacheline_aligned_in_smp;
struct mlx5e_ktls_resync_resp;
@@ -575,6 +607,7 @@ struct mlx5e_icosq {
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
+ u16 max_sq_wqebbs;
struct work_struct recover_work;
} ____cacheline_aligned_in_smp;
@@ -876,9 +909,8 @@ struct mlx5e_trap;
struct mlx5e_priv {
/* priv data path fields - start */
+ struct mlx5e_selq selq;
struct mlx5e_txqsq **txq2sq;
- int **channel_tc2realtxq;
- int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx_dp dcbx_dp;
#endif
@@ -921,7 +953,6 @@ struct mlx5e_priv {
u16 drop_rq_q_counter;
struct notifier_block events_nb;
struct notifier_block blocking_events_nb;
- int num_tc_x_num_ch;
struct udp_tunnel_nic_info nic_info;
#ifdef CONFIG_MLX5_CORE_EN_DCB
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 66180ffb4606..d41936d65483 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -196,13 +196,13 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
u16 stop_room;
stop_room = mlx5e_tls_get_stop_room(mdev, params);
- stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ stop_room += mlx5e_stop_room_for_max_wqe(mdev);
if (is_mpwqe)
/* A MPWQE can take up to the maximum-sized WQE + all the normal
* stop room can be taken if a new packet breaks the active
* MPWQE session and allocates its WQEs right away.
*/
- stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ stop_room += mlx5e_stop_room_for_max_wqe(mdev);
return stop_room;
}
@@ -717,7 +717,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
u32 wqebbs;
- max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE;
+ max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
rest = max_hd_per_wqe % max_klm_per_umr;
@@ -774,10 +774,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
mlx5e_build_sq_param_common(mdev, param);
- param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
+ param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
param->is_tls = mlx5e_accel_is_ktls_rx(mdev);
if (param->is_tls)
- param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */
+ param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 82baafd3c00c..335b20b6383b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -195,7 +195,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
int node;
sq->pdev = c->pdev;
- sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
@@ -449,7 +448,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
- param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ param->stop_room = mlx5e_stop_room_for_max_wqe(mdev);
mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index c1e07496c89c..9db677e9ca9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -50,7 +50,6 @@ static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv)
struct mlx5e_qos_node {
struct hlist_node hnode;
- struct rcu_head rcu;
struct mlx5e_qos_node *parent;
u64 rate;
u32 bw_share;
@@ -132,7 +131,11 @@ static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node
__clear_bit(node->qid, priv->htb.qos_used_qids);
mlx5e_update_tx_netdev_queues(priv);
}
- kfree_rcu(node, rcu);
+ /* Make sure this qid is no longer selected by mlx5e_select_queue, so
+ * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue.
+ */
+ synchronize_net();
+ kfree(node);
}
/* TX datapath API */
@@ -273,10 +276,18 @@ err_free_sq:
static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
{
struct mlx5e_txqsq *sq;
+ u16 qid;
sq = mlx5e_get_qos_sq(priv, node->qid);
- WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq);
+ qid = mlx5e_qid_from_qos(&priv->channels, node->qid);
+
+ /* If it's a new queue, it will be marked as started at this point.
+ * Stop it before updating txq2sq.
+ */
+ mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
+
+ priv->txq2sq[qid] = sq;
/* Make the change to txq2sq visible before the queue is started.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
@@ -299,8 +310,13 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq);
- /* The queue is disabled, no synchronization with datapath is needed. */
priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+
+ /* Make the change to txq2sq visible before the queue is started again.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
}
static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
@@ -485,9 +501,11 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
if (opened) {
+ mlx5e_selq_prepare(&priv->selq, &priv->channels.params, true);
+
err = mlx5e_qos_alloc_queues(priv, &priv->channels);
if (err)
- return err;
+ goto err_cancel_selq;
}
root = mlx5e_sw_node_create_root(priv);
@@ -508,6 +526,9 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
*/
smp_store_release(&priv->htb.maj_id, htb_maj_id);
+ if (opened)
+ mlx5e_selq_apply(&priv->selq);
+
return 0;
err_sw_node_delete:
@@ -516,6 +537,8 @@ err_sw_node_delete:
err_free_queues:
if (opened)
mlx5e_qos_close_all_queues(&priv->channels);
+err_cancel_selq:
+ mlx5e_selq_cancel(&priv->selq);
return err;
}
@@ -526,8 +549,15 @@ int mlx5e_htb_root_del(struct mlx5e_priv *priv)
qos_dbg(priv->mdev, "TC_HTB_DESTROY\n");
+ /* Wait until real_num_tx_queues is updated for mlx5e_select_queue,
+ * so that we can safely switch to its non-HTB non-PTP fastpath.
+ */
+ synchronize_net();
+
+ mlx5e_selq_prepare(&priv->selq, &priv->channels.params, false);
+ mlx5e_selq_apply(&priv->selq);
+
WRITE_ONCE(priv->htb.maj_id, 0);
- synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */
root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT);
if (!root) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
new file mode 100644
index 000000000000..d98a277eb7f8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "selq.h"
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include "en.h"
+#include "en/ptp.h"
+
+struct mlx5e_selq_params {
+ unsigned int num_regular_queues;
+ unsigned int num_channels;
+ unsigned int num_tcs;
+ union {
+ u8 is_special_queues;
+ struct {
+ bool is_htb : 1;
+ bool is_ptp : 1;
+ };
+ };
+};
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
+{
+ struct mlx5e_selq_params *init_params;
+
+ selq->state_lock = state_lock;
+
+ selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL);
+ if (!selq->standby)
+ return -ENOMEM;
+
+ init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL);
+ if (!init_params) {
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ return -ENOMEM;
+ }
+ /* Assign dummy values, so that mlx5e_select_queue won't crash. */
+ *init_params = (struct mlx5e_selq_params) {
+ .num_regular_queues = 1,
+ .num_channels = 1,
+ .num_tcs = 1,
+ .is_htb = false,
+ .is_ptp = false,
+ };
+ rcu_assign_pointer(selq->active, init_params);
+
+ return 0;
+}
+
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+{
+ WARN_ON_ONCE(selq->is_prepared);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ selq->is_prepared = true;
+
+ mlx5e_selq_apply(selq);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+}
+
+void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb)
+{
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq->standby->num_channels = params->num_channels;
+ selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params);
+ selq->standby->num_regular_queues =
+ selq->standby->num_channels * selq->standby->num_tcs;
+ selq->standby->is_htb = htb;
+ selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS);
+}
+
+void mlx5e_selq_apply(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *old_params;
+
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+
+ old_params = rcu_replace_pointer(selq->active, selq->standby,
+ lockdep_is_held(selq->state_lock));
+ synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */
+ selq->standby = old_params;
+}
+
+void mlx5e_selq_cancel(struct mlx5e_selq *selq)
+{
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP)
+ return mlx5e_get_dscp_up(priv, skb);
+#endif
+ if (skb_vlan_tag_present(skb))
+ return skb_vlan_tag_get_prio(skb);
+ return 0;
+}
+
+static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int up;
+
+ up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0;
+
+ return selq->num_regular_queues + up;
+}
+
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ u16 classid;
+
+ /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+ if ((TC_H_MAJ(skb->priority) >> 16) == smp_load_acquire(&priv->htb.maj_id))
+ classid = TC_H_MIN(skb->priority);
+ else
+ classid = READ_ONCE(priv->htb.defcls);
+
+ if (!classid)
+ return 0;
+
+ return mlx5e_get_txq_by_classid(priv, classid);
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_selq_params *selq;
+ int txq_ix, up;
+
+ selq = rcu_dereference_bh(priv->selq.active);
+
+ /* This is a workaround needed only for the mlx5e_netdev_change_profile
+ * flow that zeroes out the whole priv without unregistering the netdev
+ * and without preventing ndo_select_queue from being called.
+ */
+ if (unlikely(!selq))
+ return 0;
+
+ if (likely(!selq->is_special_queues)) {
+ /* No special queues, netdev_pick_tx returns one of the regular ones. */
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ /* Normalize any picked txq_ix to [0, num_channels),
+ * So we can return a txq_ix that matches the channel and
+ * packet UP.
+ */
+ return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
+ up * selq->num_channels;
+ }
+
+ if (unlikely(selq->is_htb)) {
+ /* num_tcs == 1, shortcut for PTP */
+
+ txq_ix = mlx5e_select_htb_queue(priv, skb);
+ if (txq_ix > 0)
+ return txq_ix;
+
+ if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb)))
+ return selq->num_channels;
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
+ * If they are selected, switch to regular queues.
+ * Driver to select these queues only at mlx5e_select_ptpsq()
+ * and mlx5e_select_htb_queue().
+ */
+ return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
+ }
+
+ /* PTP is enabled */
+
+ if (mlx5e_use_ptpsq(skb))
+ return mlx5e_select_ptpsq(dev, skb, selq);
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Normalize any picked txq_ix to [0, num_channels). Queues in range
+ * [0, num_regular_queues) will be mapped to the corresponding channel
+ * index, so that we can apply the packet's UP (if num_tcs > 1).
+ * If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
+ * because driver should select the PTP only at mlx5e_select_ptpsq().
+ */
+ txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ return txq_ix + up * selq->num_channels;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
new file mode 100644
index 000000000000..6c070141d8f1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_SELQ_H__
+#define __MLX5_EN_SELQ_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_selq_params;
+
+struct mlx5e_selq {
+ struct mlx5e_selq_params __rcu *active;
+ struct mlx5e_selq_params *standby;
+ struct mutex *state_lock; /* points to priv->state_lock */
+ bool is_prepared;
+};
+
+struct mlx5e_params;
+struct net_device;
+struct sk_buff;
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock);
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq);
+void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb);
+void mlx5e_selq_apply(struct mlx5e_selq *selq);
+void mlx5e_selq_cancel(struct mlx5e_selq *selq);
+
+static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
+{
+ while (unlikely(txq >= num_channels))
+ txq -= num_channels;
+ return txq;
+}
+
+static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
+{
+ if (unlikely(txq >= num_channels)) {
+ if (unlikely(txq >= num_channels << 3))
+ txq %= num_channels;
+ else
+ do
+ txq -= num_channels;
+ while (txq >= num_channels);
+ }
+ return txq;
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+
+#endif /* __MLX5_EN_SELQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index b789af07829c..210d23bf3701 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -9,19 +9,6 @@
#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
- */
-#if L1_CACHE_BYTES < 128
-#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
-
-#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
-
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
@@ -68,8 +55,6 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
/* TX */
-u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
@@ -308,9 +293,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
-static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
- return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
+ return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
}
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
@@ -431,10 +416,10 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
}
}
-static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
-{
- BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
+#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1)
+static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size)
+{
/* A WQE must not cross the page boundary, hence two conditions:
* 1. Its size must not exceed the page size.
* 2. If the WQE size is X, and the space remaining in a page is less
@@ -443,18 +428,28 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
* stop room of X-1 + X.
* WQE size is also limited by the hardware limit.
*/
+ WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev),
+ "wqe_size %u is greater than max SQ WQEBBs %u",
+ wqe_size, mlx5e_get_max_sq_wqebbs(mdev));
- if (__builtin_constant_p(wqe_size))
- BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
- else
- WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
- return wqe_size * 2 - 1;
+ return MLX5E_STOP_ROOM(wqe_size);
+}
+
+static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev)
+{
+ return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev));
}
static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size)
{
- u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size);
+ u16 room = sq->reserved_room;
+
+ WARN_ONCE(wqe_size > sq->max_sq_wqebbs,
+ "wqe_size %u is greater than max SQ WQEBBs %u",
+ wqe_size, sq->max_sq_wqebbs);
+
+ room += MLX5E_STOP_ROOM(wqe_size);
return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 56e10c84a706..a7f020399370 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -199,7 +199,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
struct mlx5e_tx_wqe *wqe;
u16 pi;
- pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
@@ -245,10 +245,8 @@ enum {
INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
- const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
-
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- stop_room))) {
+ sq->stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -288,7 +286,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
+ if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 8d991c3b7a50..c62f11d7ef6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -123,12 +123,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
return cur;
}
-static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
+static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
if (session->inline_on)
return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
- MLX5E_TX_MPW_MAX_NUM_DS;
- return mlx5e_tx_mpwqe_is_full(session);
+ max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+
+ return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
}
struct mlx5e_xdp_wqe_info {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 9ad3459fb63a..aaf11c66bf4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -32,9 +32,9 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
- stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
- stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
- stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS);
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
+ stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS);
return stop_room;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 7a700f913582..a05580cea481 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -386,5 +386,5 @@ u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
/* FPGA */
/* Resync SKB. */
- return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ return mlx5e_stop_room_for_max_wqe(mdev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index a4c8d8d00d5a..d659fe07d464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1142,7 +1142,7 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
err = mlx5_set_trust_state(priv->mdev, *trust_state);
if (err)
return err;
- priv->dcbx_dp.trust_state = *trust_state;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state);
return 0;
}
@@ -1187,16 +1187,18 @@ static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ u8 trust_state;
int err;
- priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
-
- if (!MLX5_DSCP_SUPPORTED(mdev))
+ if (!MLX5_DSCP_SUPPORTED(mdev)) {
+ WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP);
return 0;
+ }
- err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state);
+ err = mlx5_query_trust_state(priv->mdev, &trust_state);
if (err)
return err;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state);
mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
priv->dcbx_dp.trust_state);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bf80fb612449..b157c7aac4ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -72,12 +72,13 @@
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
- bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
- MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
- MLX5_CAP_ETH(mdev, reg_umr_sq);
- u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
- bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
+ bool striding_rq_umr, inline_umr;
+ u16 max_wqe_sz_cap;
+ striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+ MLX5_CAP_ETH(mdev, reg_umr_sq);
+ max_wqe_sz_cap = mlx5e_get_max_sq_wqebbs(mdev) * MLX5_SEND_WQE_BB;
+ inline_umr = max_wqe_sz_cap >= MLX5E_UMR_WQE_INLINE_SZ;
if (!striding_rq_umr)
return false;
if (!inline_umr) {
@@ -1164,6 +1165,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
is_redirect ?
&c->priv->channel_stats[c->ix]->xdpsq :
&c->priv->channel_stats[c->ix]->rq_xdpsq;
+ sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+ sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs);
+ sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1238,6 +1242,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
sq->channel = c;
sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->reserved_room = param->stop_room;
+ sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1313,7 +1318,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int err;
sq->pdev = c->pdev;
- sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
@@ -1324,6 +1328,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+ sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
@@ -2677,39 +2683,41 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
struct mlx5e_txqsq *sq = &c->sq[tc];
priv->txq2sq[sq->txq_ix] = sq;
- priv->channel_tc2realtxq[i][tc] = i + tc * ch;
}
}
if (!priv->channels.ptp)
- return;
+ goto out;
if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
- return;
+ goto out;
for (tc = 0; tc < num_tc; tc++) {
struct mlx5e_ptp *c = priv->channels.ptp;
struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
priv->txq2sq[sq->txq_ix] = sq;
- priv->port_ptp_tc2realtxq[tc] = priv->num_tc_x_num_ch + tc;
}
-}
-static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv)
-{
- /* Sync with mlx5e_select_queue. */
- WRITE_ONCE(priv->num_tc_x_num_ch,
- mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num);
+out:
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
}
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
- mlx5e_update_num_tc_x_num_ch(priv);
mlx5e_build_txq_maps(priv);
mlx5e_activate_channels(&priv->channels);
mlx5e_qos_activate_queues(priv);
mlx5e_xdp_tx_enable(priv);
+
+ /* dev_watchdog() wants all TX queues to be started when the carrier is
+ * OK, including the ones in range real_num_tx_queues..num_tx_queues-1.
+ * Make it happy to avoid TX timeout false alarms.
+ */
netif_tx_start_all_queues(priv->netdev);
if (mlx5e_is_vport_rep(priv))
@@ -2729,11 +2737,13 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
if (mlx5e_is_vport_rep(priv))
mlx5e_remove_sqs_fwd_rules(priv);
- /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
- * polling for inactive tx queues.
+ /* The results of ndo_select_queue are unreliable, while netdev config
+ * is being changed (real_num_tx_queues, num_tc). Stop all queues to
+ * prevent ndo_start_xmit from being called, so that it can assume that
+ * the selected queue is always valid.
*/
- netif_tx_stop_all_queues(priv->netdev);
netif_tx_disable(priv->netdev);
+
mlx5e_xdp_tx_disable(priv);
mlx5e_deactivate_channels(&priv->channels);
}
@@ -2793,6 +2803,7 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
mlx5e_close_channels(&old_chs);
priv->profile->update_rx(priv);
+ mlx5e_selq_apply(&priv->selq);
out:
mlx5e_activate_priv_channels(priv);
@@ -2816,13 +2827,24 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
return mlx5e_switch_priv_params(priv, params, preactivate, context);
new_chs.params = *params;
+
+ mlx5e_selq_prepare(&priv->selq, &new_chs.params, !!priv->htb.maj_id);
+
err = mlx5e_open_channels(priv, &new_chs);
if (err)
- return err;
+ goto err_cancel_selq;
+
err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
if (err)
- mlx5e_close_channels(&new_chs);
+ goto err_close;
+
+ return 0;
+
+err_close:
+ mlx5e_close_channels(&new_chs);
+err_cancel_selq:
+ mlx5e_selq_cancel(&priv->selq);
return err;
}
@@ -2862,6 +2884,8 @@ int mlx5e_open_locked(struct net_device *netdev)
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
+ mlx5e_selq_prepare(&priv->selq, &priv->channels.params, !!priv->htb.maj_id);
+
set_bit(MLX5E_STATE_OPENED, &priv->state);
err = mlx5e_open_channels(priv, &priv->channels);
@@ -2869,6 +2893,7 @@ int mlx5e_open_locked(struct net_device *netdev)
goto err_clear_state_opened_flag;
priv->profile->update_rx(priv);
+ mlx5e_selq_apply(&priv->selq);
mlx5e_activate_priv_channels(priv);
mlx5e_apply_traps(priv, true);
if (priv->profile->update_carrier)
@@ -2879,6 +2904,7 @@ int mlx5e_open_locked(struct net_device *netdev)
err_clear_state_opened_flag:
clear_bit(MLX5E_STATE_OPENED, &priv->state);
+ mlx5e_selq_cancel(&priv->selq);
return err;
}
@@ -4637,11 +4663,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
priv->max_nch);
mlx5e_params_mqprio_reset(params);
- /* Set an initial non-zero value, so that mlx5e_select_queue won't
- * divide by zero if called before first activating channels.
- */
- priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
-
/* SQ */
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
@@ -5194,7 +5215,8 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
struct net_device *netdev,
struct mlx5_core_dev *mdev)
{
- int nch, num_txqs, node, i;
+ int nch, num_txqs, node;
+ int err;
num_txqs = netdev->num_tx_queues;
nch = mlx5e_calc_max_nch(mdev, netdev, profile);
@@ -5211,6 +5233,11 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
return -ENOMEM;
mutex_init(&priv->state_lock);
+
+ err = mlx5e_selq_init(&priv->selq, &priv->state_lock);
+ if (err)
+ goto err_free_cpumask;
+
hash_init(priv->htb.qos_tc2node);
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
@@ -5219,7 +5246,7 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
priv->wq = create_singlethread_workqueue("mlx5e");
if (!priv->wq)
- goto err_free_cpumask;
+ goto err_free_selq;
priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node);
if (!priv->txq2sq)
@@ -5229,36 +5256,21 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
if (!priv->tx_rates)
goto err_free_txq2sq;
- priv->channel_tc2realtxq =
- kcalloc_node(nch, sizeof(*priv->channel_tc2realtxq), GFP_KERNEL, node);
- if (!priv->channel_tc2realtxq)
- goto err_free_tx_rates;
-
- for (i = 0; i < nch; i++) {
- priv->channel_tc2realtxq[i] =
- kcalloc_node(profile->max_tc, sizeof(**priv->channel_tc2realtxq),
- GFP_KERNEL, node);
- if (!priv->channel_tc2realtxq[i])
- goto err_free_channel_tc2realtxq;
- }
-
priv->channel_stats =
kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
if (!priv->channel_stats)
- goto err_free_channel_tc2realtxq;
+ goto err_free_tx_rates;
return 0;
-err_free_channel_tc2realtxq:
- while (--i >= 0)
- kfree(priv->channel_tc2realtxq[i]);
- kfree(priv->channel_tc2realtxq);
err_free_tx_rates:
kfree(priv->tx_rates);
err_free_txq2sq:
kfree(priv->txq2sq);
err_destroy_workqueue:
destroy_workqueue(priv->wq);
+err_free_selq:
+ mlx5e_selq_cleanup(&priv->selq);
err_free_cpumask:
free_cpumask_var(priv->scratchpad.cpumask);
return -ENOMEM;
@@ -5275,12 +5287,12 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
for (i = 0; i < priv->stats_nch; i++)
kvfree(priv->channel_stats[i]);
kfree(priv->channel_stats);
- for (i = 0; i < priv->max_nch; i++)
- kfree(priv->channel_tc2realtxq[i]);
- kfree(priv->channel_tc2realtxq);
kfree(priv->tx_rates);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
+ mutex_lock(&priv->state_lock);
+ mlx5e_selq_cleanup(&priv->selq);
+ mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb.max_qos_sqs; i++)
@@ -5346,6 +5358,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof
}
netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
dev_net_set(netdev, mlx5_core_net(mdev));
return netdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 06d1f46f1688..a3f257623af4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -632,11 +632,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
- /* Set an initial non-zero value, so that mlx5e_select_queue won't
- * divide by zero if called before first activating channels.
- */
- priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
-
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index ee0a8f5206e3..91fdf957cd7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -620,7 +620,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
struct mlx5e_icosq *sq = rq->icosq;
int i, err, max_klm_entries, len;
- max_klm_entries = MLX5E_MAX_KLM_PER_WQE;
+ max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
klm_entries = bitmap_find_window(shampo->bitmap,
shampo->hd_per_wqe,
shampo->hd_per_wq, shampo->pi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ee7ecb88adc1..2dc48406cd08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -53,117 +53,6 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
}
}
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
-{
- int dscp_cp = 0;
-
- if (skb->protocol == htons(ETH_P_IP))
- dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
- else if (skb->protocol == htons(ETH_P_IPV6))
- dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
-
- return priv->dcbx_dp.dscp2prio[dscp_cp];
-}
-#endif
-
-static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- int up = 0;
-
- if (!netdev_get_num_tc(dev))
- goto return_txq;
-
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
- up = mlx5e_get_dscp_up(priv, skb);
- else
-#endif
- if (skb_vlan_tag_present(skb))
- up = skb_vlan_tag_get_prio(skb);
-
-return_txq:
- return priv->port_ptp_tc2realtxq[up];
-}
-
-static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
- u16 htb_maj_id)
-{
- u16 classid;
-
- if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
- classid = TC_H_MIN(skb->priority);
- else
- classid = READ_ONCE(priv->htb.defcls);
-
- if (!classid)
- return 0;
-
- return mlx5e_get_txq_by_classid(priv, classid);
-}
-
-u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- int num_tc_x_num_ch;
- int txq_ix;
- int up = 0;
- int ch_ix;
-
- /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
- num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
- if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
- struct mlx5e_ptp *ptp_channel;
-
- /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
- u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
-
- if (unlikely(htb_maj_id)) {
- txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id);
- if (txq_ix > 0)
- return txq_ix;
- }
-
- ptp_channel = READ_ONCE(priv->channels.ptp);
- if (unlikely(ptp_channel &&
- test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
- mlx5e_use_ptpsq(skb)))
- return mlx5e_select_ptpsq(dev, skb);
-
- txq_ix = netdev_pick_tx(dev, skb, NULL);
- /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
- * If they are selected, switch to regular queues.
- * Driver to select these queues only at mlx5e_select_ptpsq()
- * and mlx5e_select_htb_queue().
- */
- if (unlikely(txq_ix >= num_tc_x_num_ch))
- txq_ix %= num_tc_x_num_ch;
- } else {
- txq_ix = netdev_pick_tx(dev, skb, NULL);
- }
-
- if (!netdev_get_num_tc(dev))
- return txq_ix;
-
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
- up = mlx5e_get_dscp_up(priv, skb);
- else
-#endif
- if (skb_vlan_tag_present(skb))
- up = skb_vlan_tag_get_prio(skb);
-
- /* Normalize any picked txq_ix to [0, num_channels),
- * So we can return a txq_ix that matches the channel and
- * packet UP.
- */
- ch_ix = priv->txq2sq[txq_ix]->ch_ix;
-
- return priv->channel_tc2realtxq[ch_ix][up];
-}
-
static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
{
#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
@@ -544,7 +433,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe;
u16 pi;
- pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
@@ -645,7 +534,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_tx_skb_update_hwts_flags(skb);
- if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
+ if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
cseg = mlx5e_tx_mpwqe_session_complete(sq);
@@ -691,8 +580,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
struct mlx5e_txqsq *sq;
u16 pi;
+ /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the
+ * queue being changed is disabled, and smp_wmb guarantees that the
+ * changes are visible before mlx5e_xmit tries to read from txq2sq. It
+ * guarantees that the value of txq2sq[qid] doesn't change while
+ * mlx5e_xmit is running on queue number qid. smb_wmb is paired with
+ * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE.
+ */
sq = priv->txq2sq[skb_get_queue_mapping(skb)];
if (unlikely(!sq)) {
+ /* Two cases when sq can be NULL:
+ * 1. The HTB node is registered, and mlx5e_select_queue
+ * selected its queue ID, but the SQ itself is not yet created.
+ * 2. HTB SQ creation failed. Similar to the previous case, but
+ * the SQ won't be created.
+ */
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}