diff options
17 files changed, 480 insertions, 232 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index fcfd38fa9e6c..a7170ab3af97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o en/fs_tt_redirect.o + en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c14e06ca64d8..e29ac77e9bec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -59,6 +59,7 @@ #include "lib/hv_vhca.h" #include "lib/clock.h" #include "en/rx_res.h" +#include "en/selq.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -172,8 +173,9 @@ struct page_pool; #define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) -#define MLX5E_MAX_KLM_PER_WQE \ - MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE) +#define MLX5E_MAX_KLM_PER_WQE(mdev) \ + MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \ + << MLX5_MKEY_BSF_OCTO_SIZE) #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -221,6 +223,32 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } +/* The maximum WQE size can be retrieved by max_wqe_sz_sq in + * bytes units. Driver hardens the limitation to 1KB (16 + * WQEBBs), unless firmware capability is stricter. + */ +static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) +{ + return min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS, + MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); +} + +static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs) +{ +/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. + * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, + * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64) + * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower + * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be + * cache-aligned. + */ +#if L1_CACHE_BYTES < 128 + return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); +#else + return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2); +#endif +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -427,12 +455,12 @@ struct mlx5e_txqsq { struct netdev_queue *txq; u32 sqn; u16 stop_room; + u16 max_sq_mpw_wqebbs; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; unsigned long state; unsigned int hw_mtu; - struct hwtstamp_config *tstamp; struct mlx5_clock *clock; struct net_device *netdev; struct mlx5_core_dev *mdev; @@ -446,6 +474,7 @@ struct mlx5e_txqsq { struct work_struct recover_work; struct mlx5e_ptpsq *ptpsq; cqe_ts_to_ns ptp_cyc2time; + u16 max_sq_wqebbs; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -540,6 +569,8 @@ struct mlx5e_xdpsq { u32 sqn; struct device *pdev; __be32 mkey_be; + u16 stop_room; + u16 max_sq_mpw_wqebbs; u8 min_inline_mode; unsigned long state; unsigned int hw_mtu; @@ -547,6 +578,7 @@ struct mlx5e_xdpsq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; + u16 max_sq_wqebbs; } ____cacheline_aligned_in_smp; struct mlx5e_ktls_resync_resp; @@ -575,6 +607,7 @@ struct mlx5e_icosq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; + u16 max_sq_wqebbs; struct work_struct recover_work; } ____cacheline_aligned_in_smp; @@ -876,9 +909,8 @@ struct mlx5e_trap; struct mlx5e_priv { /* priv data path fields - start */ + struct mlx5e_selq selq; struct mlx5e_txqsq **txq2sq; - int **channel_tc2realtxq; - int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx_dp dcbx_dp; #endif @@ -921,7 +953,6 @@ struct mlx5e_priv { u16 drop_rq_q_counter; struct notifier_block events_nb; struct notifier_block blocking_events_nb; - int num_tc_x_num_ch; struct udp_tunnel_nic_info nic_info; #ifdef CONFIG_MLX5_CORE_EN_DCB diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 66180ffb4606..d41936d65483 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -196,13 +196,13 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par u16 stop_room; stop_room = mlx5e_tls_get_stop_room(mdev, params); - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); if (is_mpwqe) /* A MPWQE can take up to the maximum-sized WQE + all the normal * stop room can be taken if a new packet breaks the active * MPWQE session and allocates its WQEs right away. */ - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); return stop_room; } @@ -717,7 +717,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); u32 wqebbs; - max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE; + max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; rest = max_hd_per_wqe % max_klm_per_umr; @@ -774,10 +774,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(mdev, param); - param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ + param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */ param->is_tls = mlx5e_accel_is_ktls_rx(mdev); if (param->is_tls) - param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */ + param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); MLX5_SET(wq, wq, log_wq_sz, log_wq_size); mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 82baafd3c00c..335b20b6383b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -195,7 +195,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->tstamp = c->tstamp; sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; @@ -449,7 +448,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + param->stop_room = mlx5e_stop_room_for_max_wqe(mdev); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index c1e07496c89c..9db677e9ca9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -50,7 +50,6 @@ static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv) struct mlx5e_qos_node { struct hlist_node hnode; - struct rcu_head rcu; struct mlx5e_qos_node *parent; u64 rate; u32 bw_share; @@ -132,7 +131,11 @@ static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node __clear_bit(node->qid, priv->htb.qos_used_qids); mlx5e_update_tx_netdev_queues(priv); } - kfree_rcu(node, rcu); + /* Make sure this qid is no longer selected by mlx5e_select_queue, so + * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue. + */ + synchronize_net(); + kfree(node); } /* TX datapath API */ @@ -273,10 +276,18 @@ err_free_sq: static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) { struct mlx5e_txqsq *sq; + u16 qid; sq = mlx5e_get_qos_sq(priv, node->qid); - WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq); + qid = mlx5e_qid_from_qos(&priv->channels, node->qid); + + /* If it's a new queue, it will be marked as started at this point. + * Stop it before updating txq2sq. + */ + mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid)); + + priv->txq2sq[qid] = sq; /* Make the change to txq2sq visible before the queue is started. * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, @@ -299,8 +310,13 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); mlx5e_deactivate_txqsq(sq); - /* The queue is disabled, no synchronization with datapath is needed. */ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; + + /* Make the change to txq2sq visible before the queue is started again. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); } static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) @@ -485,9 +501,11 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (opened) { + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, true); + err = mlx5e_qos_alloc_queues(priv, &priv->channels); if (err) - return err; + goto err_cancel_selq; } root = mlx5e_sw_node_create_root(priv); @@ -508,6 +526,9 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, */ smp_store_release(&priv->htb.maj_id, htb_maj_id); + if (opened) + mlx5e_selq_apply(&priv->selq); + return 0; err_sw_node_delete: @@ -516,6 +537,8 @@ err_sw_node_delete: err_free_queues: if (opened) mlx5e_qos_close_all_queues(&priv->channels); +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); return err; } @@ -526,8 +549,15 @@ int mlx5e_htb_root_del(struct mlx5e_priv *priv) qos_dbg(priv->mdev, "TC_HTB_DESTROY\n"); + /* Wait until real_num_tx_queues is updated for mlx5e_select_queue, + * so that we can safely switch to its non-HTB non-PTP fastpath. + */ + synchronize_net(); + + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, false); + mlx5e_selq_apply(&priv->selq); + WRITE_ONCE(priv->htb.maj_id, 0); - synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */ root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT); if (!root) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c new file mode 100644 index 000000000000..d98a277eb7f8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "selq.h" +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include "en.h" +#include "en/ptp.h" + +struct mlx5e_selq_params { + unsigned int num_regular_queues; + unsigned int num_channels; + unsigned int num_tcs; + union { + u8 is_special_queues; + struct { + bool is_htb : 1; + bool is_ptp : 1; + }; + }; +}; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) +{ + struct mlx5e_selq_params *init_params; + + selq->state_lock = state_lock; + + selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL); + if (!selq->standby) + return -ENOMEM; + + init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL); + if (!init_params) { + kvfree(selq->standby); + selq->standby = NULL; + return -ENOMEM; + } + /* Assign dummy values, so that mlx5e_select_queue won't crash. */ + *init_params = (struct mlx5e_selq_params) { + .num_regular_queues = 1, + .num_channels = 1, + .num_tcs = 1, + .is_htb = false, + .is_ptp = false, + }; + rcu_assign_pointer(selq->active, init_params); + + return 0; +} + +void mlx5e_selq_cleanup(struct mlx5e_selq *selq) +{ + WARN_ON_ONCE(selq->is_prepared); + + kvfree(selq->standby); + selq->standby = NULL; + selq->is_prepared = true; + + mlx5e_selq_apply(selq); + + kvfree(selq->standby); + selq->standby = NULL; +} + +void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq->standby->num_channels = params->num_channels; + selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params); + selq->standby->num_regular_queues = + selq->standby->num_channels * selq->standby->num_tcs; + selq->standby->is_htb = htb; + selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS); +} + +void mlx5e_selq_apply(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *old_params; + + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; + + old_params = rcu_replace_pointer(selq->active, selq->standby, + lockdep_is_held(selq->state_lock)); + synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */ + selq->standby = old_params; +} + +void mlx5e_selq_cancel(struct mlx5e_selq *selq) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; +} + +#ifdef CONFIG_MLX5_CORE_EN_DCB +static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + +static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP) + return mlx5e_get_dscp_up(priv, skb); +#endif + if (skb_vlan_tag_present(skb)) + return skb_vlan_tag_get_prio(skb); + return 0; +} + +static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int up; + + up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0; + + return selq->num_regular_queues + up; +} + +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + u16 classid; + + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + if ((TC_H_MAJ(skb->priority) >> 16) == smp_load_acquire(&priv->htb.maj_id)) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(priv->htb.defcls); + + if (!classid) + return 0; + + return mlx5e_get_txq_by_classid(priv, classid); +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_selq_params *selq; + int txq_ix, up; + + selq = rcu_dereference_bh(priv->selq.active); + + /* This is a workaround needed only for the mlx5e_netdev_change_profile + * flow that zeroes out the whole priv without unregistering the netdev + * and without preventing ndo_select_queue from being called. + */ + if (unlikely(!selq)) + return 0; + + if (likely(!selq->is_special_queues)) { + /* No special queues, netdev_pick_tx returns one of the regular ones. */ + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. + */ + return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) + + up * selq->num_channels; + } + + if (unlikely(selq->is_htb)) { + /* num_tcs == 1, shortcut for PTP */ + + txq_ix = mlx5e_select_htb_queue(priv, skb); + if (txq_ix > 0) + return txq_ix; + + if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb))) + return selq->num_channels; + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. + * If they are selected, switch to regular queues. + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). + */ + return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels); + } + + /* PTP is enabled */ + + if (mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb, selq); + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Normalize any picked txq_ix to [0, num_channels). Queues in range + * [0, num_regular_queues) will be mapped to the corresponding channel + * index, so that we can apply the packet's UP (if num_tcs > 1). + * If netdev_pick_tx() picks ptp_channel, switch to a regular queue, + * because driver should select the PTP only at mlx5e_select_ptpsq(). + */ + txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + return txq_ix + up * selq->num_channels; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h new file mode 100644 index 000000000000..6c070141d8f1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_SELQ_H__ +#define __MLX5_EN_SELQ_H__ + +#include <linux/kernel.h> + +struct mlx5e_selq_params; + +struct mlx5e_selq { + struct mlx5e_selq_params __rcu *active; + struct mlx5e_selq_params *standby; + struct mutex *state_lock; /* points to priv->state_lock */ + bool is_prepared; +}; + +struct mlx5e_params; +struct net_device; +struct sk_buff; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock); +void mlx5e_selq_cleanup(struct mlx5e_selq *selq); +void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb); +void mlx5e_selq_apply(struct mlx5e_selq *selq); +void mlx5e_selq_cancel(struct mlx5e_selq *selq); + +static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels) +{ + while (unlikely(txq >= num_channels)) + txq -= num_channels; + return txq; +} + +static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels) +{ + if (unlikely(txq >= num_channels)) { + if (unlikely(txq >= num_channels << 3)) + txq %= num_channels; + else + do + txq -= num_channels; + while (txq >= num_channels); + } + return txq; +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); + +#endif /* __MLX5_EN_SELQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index b789af07829c..210d23bf3701 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -9,19 +9,6 @@ #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif - -#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) @@ -68,8 +55,6 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); /* TX */ -u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); @@ -308,9 +293,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); -static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { - return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS; + return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; } static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) @@ -431,10 +416,10 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } -static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) -{ - BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS); +#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) +static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) +{ /* A WQE must not cross the page boundary, hence two conditions: * 1. Its size must not exceed the page size. * 2. If the WQE size is X, and the space remaining in a page is less @@ -443,18 +428,28 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) * stop room of X-1 + X. * WQE size is also limited by the hardware limit. */ + WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev), + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, mlx5e_get_max_sq_wqebbs(mdev)); - if (__builtin_constant_p(wqe_size)) - BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); - else - WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); - return wqe_size * 2 - 1; + return MLX5E_STOP_ROOM(wqe_size); +} + +static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev) +{ + return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); } static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) { - u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size); + u16 room = sq->reserved_room; + + WARN_ONCE(wqe_size > sq->max_sq_wqebbs, + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, sq->max_sq_wqebbs); + + room += MLX5E_STOP_ROOM(wqe_size); return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 56e10c84a706..a7f020399370 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -199,7 +199,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -245,10 +245,8 @@ enum { INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { - const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - stop_room))) { + sq->stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -288,7 +286,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(mlx5e_xdp_mpqwe_is_full(session))) + if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs))) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 8d991c3b7a50..c62f11d7ef6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -123,12 +123,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) return cur; } -static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { if (session->inline_on) return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > - MLX5E_TX_MPW_MAX_NUM_DS; - return mlx5e_tx_mpwqe_is_full(session); + max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + + return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); } struct mlx5e_xdp_wqe_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 9ad3459fb63a..aaf11c66bf4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -32,9 +32,9 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); - stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); - stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); - stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); + stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS); return stop_room; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 7a700f913582..a05580cea481 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -386,5 +386,5 @@ u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par /* FPGA */ /* Resync SKB. */ - return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + return mlx5e_stop_room_for_max_wqe(mdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index a4c8d8d00d5a..d659fe07d464 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1142,7 +1142,7 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) err = mlx5_set_trust_state(priv->mdev, *trust_state); if (err) return err; - priv->dcbx_dp.trust_state = *trust_state; + WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state); return 0; } @@ -1187,16 +1187,18 @@ static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) static int mlx5e_trust_initialize(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u8 trust_state; int err; - priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; - - if (!MLX5_DSCP_SUPPORTED(mdev)) + if (!MLX5_DSCP_SUPPORTED(mdev)) { + WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP); return 0; + } - err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state); + err = mlx5_query_trust_state(priv->mdev, &trust_state); if (err) return err; + WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, priv->dcbx_dp.trust_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bf80fb612449..b157c7aac4ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -72,12 +72,13 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { - bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); - u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq); - bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap; + bool striding_rq_umr, inline_umr; + u16 max_wqe_sz_cap; + striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); + max_wqe_sz_cap = mlx5e_get_max_sq_wqebbs(mdev) * MLX5_SEND_WQE_BB; + inline_umr = max_wqe_sz_cap >= MLX5E_UMR_WQE_INLINE_SZ; if (!striding_rq_umr) return false; if (!inline_umr) { @@ -1164,6 +1165,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, is_redirect ? &c->priv->channel_stats[c->ix]->xdpsq : &c->priv->channel_stats[c->ix]->rq_xdpsq; + sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs); + sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1238,6 +1242,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->reserved_room = param->stop_room; + sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1313,7 +1318,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int err; sq->pdev = c->pdev; - sq->tstamp = c->tstamp; sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; @@ -1324,6 +1328,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); @@ -2677,39 +2683,41 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) struct mlx5e_txqsq *sq = &c->sq[tc]; priv->txq2sq[sq->txq_ix] = sq; - priv->channel_tc2realtxq[i][tc] = i + tc * ch; } } if (!priv->channels.ptp) - return; + goto out; if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) - return; + goto out; for (tc = 0; tc < num_tc; tc++) { struct mlx5e_ptp *c = priv->channels.ptp; struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; priv->txq2sq[sq->txq_ix] = sq; - priv->port_ptp_tc2realtxq[tc] = priv->num_tc_x_num_ch + tc; } -} -static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv) -{ - /* Sync with mlx5e_select_queue. */ - WRITE_ONCE(priv->num_tc_x_num_ch, - mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num); +out: + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); } void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { - mlx5e_update_num_tc_x_num_ch(priv); mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); mlx5e_qos_activate_queues(priv); mlx5e_xdp_tx_enable(priv); + + /* dev_watchdog() wants all TX queues to be started when the carrier is + * OK, including the ones in range real_num_tx_queues..num_tx_queues-1. + * Make it happy to avoid TX timeout false alarms. + */ netif_tx_start_all_queues(priv->netdev); if (mlx5e_is_vport_rep(priv)) @@ -2729,11 +2737,13 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); - /* FIXME: This is a W/A only for tx timeout watch dog false alarm when - * polling for inactive tx queues. + /* The results of ndo_select_queue are unreliable, while netdev config + * is being changed (real_num_tx_queues, num_tc). Stop all queues to + * prevent ndo_start_xmit from being called, so that it can assume that + * the selected queue is always valid. */ - netif_tx_stop_all_queues(priv->netdev); netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); mlx5e_deactivate_channels(&priv->channels); } @@ -2793,6 +2803,7 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, mlx5e_close_channels(&old_chs); priv->profile->update_rx(priv); + mlx5e_selq_apply(&priv->selq); out: mlx5e_activate_priv_channels(priv); @@ -2816,13 +2827,24 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, return mlx5e_switch_priv_params(priv, params, preactivate, context); new_chs.params = *params; + + mlx5e_selq_prepare(&priv->selq, &new_chs.params, !!priv->htb.maj_id); + err = mlx5e_open_channels(priv, &new_chs); if (err) - return err; + goto err_cancel_selq; + err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context); if (err) - mlx5e_close_channels(&new_chs); + goto err_close; + + return 0; + +err_close: + mlx5e_close_channels(&new_chs); +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); return err; } @@ -2862,6 +2884,8 @@ int mlx5e_open_locked(struct net_device *netdev) struct mlx5e_priv *priv = netdev_priv(netdev); int err; + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, !!priv->htb.maj_id); + set_bit(MLX5E_STATE_OPENED, &priv->state); err = mlx5e_open_channels(priv, &priv->channels); @@ -2869,6 +2893,7 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; priv->profile->update_rx(priv); + mlx5e_selq_apply(&priv->selq); mlx5e_activate_priv_channels(priv); mlx5e_apply_traps(priv, true); if (priv->profile->update_carrier) @@ -2879,6 +2904,7 @@ int mlx5e_open_locked(struct net_device *netdev) err_clear_state_opened_flag: clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_selq_cancel(&priv->selq); return err; } @@ -4637,11 +4663,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 priv->max_nch); mlx5e_params_mqprio_reset(params); - /* Set an initial non-zero value, so that mlx5e_select_queue won't - * divide by zero if called before first activating channels. - */ - priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; - /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@ -5194,7 +5215,8 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, struct net_device *netdev, struct mlx5_core_dev *mdev) { - int nch, num_txqs, node, i; + int nch, num_txqs, node; + int err; num_txqs = netdev->num_tx_queues; nch = mlx5e_calc_max_nch(mdev, netdev, profile); @@ -5211,6 +5233,11 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, return -ENOMEM; mutex_init(&priv->state_lock); + + err = mlx5e_selq_init(&priv->selq, &priv->state_lock); + if (err) + goto err_free_cpumask; + hash_init(priv->htb.qos_tc2node); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); @@ -5219,7 +5246,7 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - goto err_free_cpumask; + goto err_free_selq; priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); if (!priv->txq2sq) @@ -5229,36 +5256,21 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, if (!priv->tx_rates) goto err_free_txq2sq; - priv->channel_tc2realtxq = - kcalloc_node(nch, sizeof(*priv->channel_tc2realtxq), GFP_KERNEL, node); - if (!priv->channel_tc2realtxq) - goto err_free_tx_rates; - - for (i = 0; i < nch; i++) { - priv->channel_tc2realtxq[i] = - kcalloc_node(profile->max_tc, sizeof(**priv->channel_tc2realtxq), - GFP_KERNEL, node); - if (!priv->channel_tc2realtxq[i]) - goto err_free_channel_tc2realtxq; - } - priv->channel_stats = kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); if (!priv->channel_stats) - goto err_free_channel_tc2realtxq; + goto err_free_tx_rates; return 0; -err_free_channel_tc2realtxq: - while (--i >= 0) - kfree(priv->channel_tc2realtxq[i]); - kfree(priv->channel_tc2realtxq); err_free_tx_rates: kfree(priv->tx_rates); err_free_txq2sq: kfree(priv->txq2sq); err_destroy_workqueue: destroy_workqueue(priv->wq); +err_free_selq: + mlx5e_selq_cleanup(&priv->selq); err_free_cpumask: free_cpumask_var(priv->scratchpad.cpumask); return -ENOMEM; @@ -5275,12 +5287,12 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) for (i = 0; i < priv->stats_nch; i++) kvfree(priv->channel_stats[i]); kfree(priv->channel_stats); - for (i = 0; i < priv->max_nch; i++) - kfree(priv->channel_tc2realtxq[i]); - kfree(priv->channel_tc2realtxq); kfree(priv->tx_rates); kfree(priv->txq2sq); destroy_workqueue(priv->wq); + mutex_lock(&priv->state_lock); + mlx5e_selq_cleanup(&priv->selq); + mutex_unlock(&priv->state_lock); free_cpumask_var(priv->scratchpad.cpumask); for (i = 0; i < priv->htb.max_qos_sqs; i++) @@ -5346,6 +5358,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof } netif_carrier_off(netdev); + netif_tx_disable(netdev); dev_net_set(netdev, mlx5_core_net(mdev)); return netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 06d1f46f1688..a3f257623af4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -632,11 +632,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->mqprio.num_tc = 1; params->tunneled_offload_en = false; - /* Set an initial non-zero value, so that mlx5e_select_queue won't - * divide by zero if called before first activating channels. - */ - priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; - mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ee0a8f5206e3..91fdf957cd7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -620,7 +620,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) struct mlx5e_icosq *sq = rq->icosq; int i, err, max_klm_entries, len; - max_klm_entries = MLX5E_MAX_KLM_PER_WQE; + max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); klm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ee7ecb88adc1..2dc48406cd08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -53,117 +53,6 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) } } -#ifdef CONFIG_MLX5_CORE_EN_DCB -static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) -{ - int dscp_cp = 0; - - if (skb->protocol == htons(ETH_P_IP)) - dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; - else if (skb->protocol == htons(ETH_P_IPV6)) - dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; - - return priv->dcbx_dp.dscp2prio[dscp_cp]; -} -#endif - -static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - int up = 0; - - if (!netdev_get_num_tc(dev)) - goto return_txq; - -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) - up = mlx5e_get_dscp_up(priv, skb); - else -#endif - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get_prio(skb); - -return_txq: - return priv->port_ptp_tc2realtxq[up]; -} - -static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, - u16 htb_maj_id) -{ - u16 classid; - - if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) - classid = TC_H_MIN(skb->priority); - else - classid = READ_ONCE(priv->htb.defcls); - - if (!classid) - return 0; - - return mlx5e_get_txq_by_classid(priv, classid); -} - -u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - int num_tc_x_num_ch; - int txq_ix; - int up = 0; - int ch_ix; - - /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ - num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); - if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { - struct mlx5e_ptp *ptp_channel; - - /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ - u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); - - if (unlikely(htb_maj_id)) { - txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id); - if (txq_ix > 0) - return txq_ix; - } - - ptp_channel = READ_ONCE(priv->channels.ptp); - if (unlikely(ptp_channel && - test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && - mlx5e_use_ptpsq(skb))) - return mlx5e_select_ptpsq(dev, skb); - - txq_ix = netdev_pick_tx(dev, skb, NULL); - /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. - * If they are selected, switch to regular queues. - * Driver to select these queues only at mlx5e_select_ptpsq() - * and mlx5e_select_htb_queue(). - */ - if (unlikely(txq_ix >= num_tc_x_num_ch)) - txq_ix %= num_tc_x_num_ch; - } else { - txq_ix = netdev_pick_tx(dev, skb, NULL); - } - - if (!netdev_get_num_tc(dev)) - return txq_ix; - -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) - up = mlx5e_get_dscp_up(priv, skb); - else -#endif - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get_prio(skb); - - /* Normalize any picked txq_ix to [0, num_channels), - * So we can return a txq_ix that matches the channel and - * packet UP. - */ - ch_ix = priv->txq2sq[txq_ix]->ch_ix; - - return priv->channel_tc2realtxq[ch_ix][up]; -} - static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) { #define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) @@ -544,7 +433,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -645,7 +534,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_tx_skb_update_hwts_flags(skb); - if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { + if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ cseg = mlx5e_tx_mpwqe_session_complete(sq); @@ -691,8 +580,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) struct mlx5e_txqsq *sq; u16 pi; + /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the + * queue being changed is disabled, and smp_wmb guarantees that the + * changes are visible before mlx5e_xmit tries to read from txq2sq. It + * guarantees that the value of txq2sq[qid] doesn't change while + * mlx5e_xmit is running on queue number qid. smb_wmb is paired with + * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. + */ sq = priv->txq2sq[skb_get_queue_mapping(skb)]; if (unlikely(!sq)) { + /* Two cases when sq can be NULL: + * 1. The HTB node is registered, and mlx5e_select_queue + * selected its queue ID, but the SQ itself is not yet created. + * 2. HTB SQ creation failed. Similar to the previous case, but + * the SQ won't be created. + */ dev_kfree_skb_any(skb); return NETDEV_TX_OK; } |