From 082a9edf12fef88400172e7d1b131d65a3ed492e Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:18 -0700 Subject: net/mlx5e: xsk: Flush RQ on XSK activation to save memory The regular RQ remains open after opening an XSK socket, in order to guarantee that closing the XSK socket never fails due to an error when reopening the regular RQ. To save memory, the regular RQ can be deactivated and flushed, releasing all pages, when an XSK socket is open. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 14 +++++++++----- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6bc6472b98f2..9e6347a67fd2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1036,6 +1036,7 @@ struct mlx5e_rq_param; int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, int node, struct mlx5e_rq *rq); +#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_close_rq(struct mlx5e_rq *rq); int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 9804ef15a4d6..8b09e2f58a4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -126,6 +126,9 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true); + mlx5e_deactivate_rq(&c->rq); + mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY); + return 0; err_remove_pool: @@ -165,7 +168,13 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) goto remove_pool; c = priv->channels.c[ix]; + + mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_icosq(c); + mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT); + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false); + mlx5e_deactivate_xsk(c); mlx5e_close_xsk(c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 21fe43406d88..10428ade96c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2408,10 +2408,11 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); mlx5e_activate_icosq(&c->async_icosq); - mlx5e_activate_rq(&c->rq); if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); + else + mlx5e_activate_rq(&c->rq); mlx5e_trigger_napi_icosq(c); } @@ -2422,8 +2423,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_deactivate_xsk(c); + else + mlx5e_deactivate_rq(&c->rq); - mlx5e_deactivate_rq(&c->rq); mlx5e_deactivate_icosq(&c->async_icosq); mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) @@ -2515,8 +2517,6 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs) mlx5e_ptp_activate_channel(chs->ptp); } -#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ - static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) { int err = 0; @@ -2524,8 +2524,12 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) for (i = 0; i < chs->num; i++) { int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; + struct mlx5e_channel *c = chs->c[i]; + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; - err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout); + err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout); /* Don't wait on the XSK RQ, because the newer xdpsock sample * doesn't provide any Fill Ring entries at the setup stage. -- cgit v1.2.3 From a2740f529da2dab929e10bf565073f6659c863fc Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:19 -0700 Subject: net/mlx5e: xsk: Set napi_id to support busy polling xdp_rxq_info_reg should get the actual napi_id, not 0, in order to support socket busy polling properly. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 10428ade96c1..3ee8295c2115 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -539,7 +539,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param if (err) return err; - return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id); } static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, -- cgit v1.2.3 From 1ca6492ec964325396d5822a26ff53876e466f71 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:20 -0700 Subject: net/mlx5e: xsk: Include XSK skb_from_cqe callbacks in INDIRECT_CALL XSK is a performance-critical data path. To avoid an indirect function call with a retpoline, include XSK callbacks in the INDIRECT_CALL macro, so that they are called directly in XSK flows. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 36eda4c958a0..5835d86be8d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1709,9 +1709,10 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto free_wqe; } - skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, + mlx5e_xsk_skb_from_cqe_linear, rq, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ @@ -2180,9 +2181,10 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); - skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq, mlx5e_skb_from_cqe_mpwrq_linear, mlx5e_skb_from_cqe_mpwrq_nonlinear, + mlx5e_xsk_skb_from_cqe_mpwrq_linear, rq, wi, cqe_bcnt, head_offset, page_idx); if (!skb) goto mpwrq_cqe_out; -- cgit v1.2.3 From cfb4d09c30c9d5b2b4d09766ebff2ec7a0f669da Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:21 -0700 Subject: net/mlx5e: xsk: Improve need_wakeup logic XSK need_wakeup mechanism allows the driver to stop busy waiting for buffers when the fill ring is empty, yield to the application and signal it that the driver needs to be waken up after the application refills the fill ring. Add protection against the race condition on the RX (refill) side: if the application refills buffers after xskrq->post_wqes is called, but before mlx5e_xsk_update_rx_wakeup, NAPI will exit, skipping taking these buffers to the hardware WQ, and the application won't wake it up again. Optimize the whole need_wakeup logic, removing unneeded flows, to compensate for this new check. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h | 14 --------- .../net/ethernet/mellanox/mlx5/core/en/xsk/tx.h | 12 -------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 33 ++++++++++++++-------- 4 files changed, 23 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index 84a496a8d72f..087c943bd8e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -5,7 +5,6 @@ #define __MLX5_EN_XSK_RX_H__ #include "en.h" -#include /* RX data path */ @@ -21,17 +20,4 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); -static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err) -{ - if (!xsk_uses_need_wakeup(rq->xsk_pool)) - return alloc_err; - - if (unlikely(alloc_err)) - xsk_set_rx_need_wakeup(rq->xsk_pool); - else - xsk_clear_rx_need_wakeup(rq->xsk_pool); - - return false; -} - #endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h index a05085035f23..9c505158b975 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -5,7 +5,6 @@ #define __MLX5_EN_XSK_TX_H__ #include "en.h" -#include /* TX data path */ @@ -13,15 +12,4 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); -static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq) -{ - if (!xsk_uses_need_wakeup(sq->xsk_pool)) - return; - - if (sq->pc != sq->cc) - xsk_clear_tx_need_wakeup(sq->xsk_pool); - else - xsk_set_tx_need_wakeup(sq->xsk_pool); -} - #endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5835d86be8d8..b61604d87701 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "en.h" #include "en/txrx.h" #include "en_tc.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 833be29170a1..9a458a5d9853 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -31,6 +31,7 @@ */ #include +#include #include "en.h" #include "en/txrx.h" #include "en/xdp.h" @@ -86,26 +87,36 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq) static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) { + bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool); bool busy_xsk = false, xsk_rx_alloc_err; - /* Handle the race between the application querying need_wakeup and the - * driver setting it: - * 1. Update need_wakeup both before and after the TX. If it goes to - * "yes", it can only happen with the first update. - * 2. If the application queried need_wakeup before we set it, the - * packets will be transmitted anyway, even w/o a wakeup. - * 3. Give a chance to clear need_wakeup after new packets were queued - * for TX. + /* If SQ is empty, there are no TX completions to trigger NAPI, so set + * need_wakeup. Do it before queuing packets for TX to avoid race + * condition with userspace. */ - mlx5e_xsk_update_tx_wakeup(xsksq); + if (need_wakeup && xsksq->pc == xsksq->cc) + xsk_set_tx_need_wakeup(xsksq->xsk_pool); busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); - mlx5e_xsk_update_tx_wakeup(xsksq); + /* If we queued some packets for TX, no need for wakeup anymore. */ + if (need_wakeup && xsksq->pc != xsksq->cc) + xsk_clear_tx_need_wakeup(xsksq->xsk_pool); + /* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it + * before refilling to avoid race condition with userspace. + */ + if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq)) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes, mlx5e_post_rx_mpwqes, mlx5e_post_rx_wqes, xskrq); - busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); + /* Ask for wakeup if WQ is not full after refill. */ + if (!need_wakeup) + busy_xsk |= xsk_rx_alloc_err; + else if (xsk_rx_alloc_err) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); + else + xsk_clear_rx_need_wakeup(xskrq->xsk_pool); return busy_xsk; } -- cgit v1.2.3 From 168723c1f8d6e1e823d4c6ad3cf64478cf58330a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:22 -0700 Subject: net/mlx5e: xsk: Use umr_mode to calculate striding RQ parameters Instead of passing the unaligned flag, pass an enum that indicates the UMR mode. The next commit will add the third mode (KLM for certain configurations of XSK), which will be added to this enum instead of adding another bool flag everywhere. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 +- .../net/ethernet/mellanox/mlx5/core/en/params.c | 126 ++++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en/params.h | 24 ++-- .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 4 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 72 ++++++++---- include/linux/mlx5/driver.h | 4 + 7 files changed, 171 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9e6347a67fd2..a2d09f30acd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -681,6 +681,11 @@ struct mlx5e_hw_gro_data { int second_ip_id; }; +enum mlx5e_mpwrq_umr_mode { + MLX5E_MPWRQ_UMR_MODE_ALIGNED, + MLX5E_MPWRQ_UMR_MODE_UNALIGNED, +}; + struct mlx5e_rq { /* data path */ union { @@ -708,7 +713,7 @@ struct mlx5e_rq { u8 pages_per_wqe; u8 umr_wqebbs; u8 mtts_per_wqe; - u8 unaligned; + u8 umr_mode; struct mlx5e_shampo_hd *shampo; } mpwqe; }; @@ -1008,7 +1013,7 @@ struct mlx5e_profile { void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, - bool unaligned); + enum mlx5e_mpwrq_umr_mode umr_mode); void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index ac4d70bb21e8..b57855bf7629 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -27,9 +27,48 @@ u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xs return max(req_page_shift, min_page_shift); } -u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned) +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +{ + /* Different memory management schemes use different mechanisms to map + * user-mode memory. The stricter guarantees we have, the faster + * mechanisms we use: + * 1. MTT - direct mapping in page granularity. + * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but + * all mappings have the same size. + */ + bool unaligned = xsk ? xsk->unaligned : false; + + /* XSK frames can start at arbitrary unaligned locations, but they all + * have the same size which is a power of two. It allows to optimize to + * one KSM per frame. + */ + if (unaligned) + return MLX5E_MPWRQ_UMR_MODE_UNALIGNED; + + /* XSK: frames are naturally aligned, MTT can be used. + * Non-XSK: Allocations happen in units of CPU pages, therefore, the + * mappings are naturally aligned. + */ + return MLX5E_MPWRQ_UMR_MODE_ALIGNED; +} + +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) { - u8 umr_entry_size = unaligned ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt); + switch (mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return sizeof(struct mlx5_mtt); + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return sizeof(struct mlx5_ksm); + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); + return 0; +} + +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); u8 max_pages_per_wqe, max_log_mpwqe_size; u16 max_wqe_size; @@ -44,9 +83,10 @@ u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unalig return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); } -u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned) +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { - u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned); + u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); u8 pages_per_wqe; pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1; @@ -59,10 +99,11 @@ u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool una return pages_per_wqe; } -u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned) +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { - u8 umr_entry_size = unaligned ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt); - u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, unaligned); + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); u16 umr_wqe_sz; umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) + @@ -73,25 +114,30 @@ u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unali return umr_wqe_sz; } -u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned) +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { - return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, unaligned), + return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode), MLX5_SEND_WQE_BB); } -u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned) +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + /* Add another page as a buffer between WQEs. This page will absorb * write overflow by the hardware, when receiving packets larger than * MTU. These oversize packets are dropped by the driver at a later * stage. */ - return MLX5_ALIGN_MTTS(mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, unaligned) + 1); + return MLX5_ALIGN_MTTS(pages_per_wqe + 1); } -u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, bool unaligned) +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode) { - if (unaligned) + if (umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED) return min(MLX5E_MAX_RQ_NUM_KSMS, 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size)); @@ -99,18 +145,19 @@ u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, bool unaligned) } static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift, - bool unaligned) + enum mlx5e_mpwrq_umr_mode umr_mode) { - u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, unaligned); - u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, unaligned); + u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode); + u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode); return ilog2(max_entries / mtts_per_wqe); } -u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned) +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { - return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, unaligned) + - mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned) - + return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) + + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - MLX5E_ORDER2_MAX_PACKET_MTU; } @@ -171,10 +218,10 @@ static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) { u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - bool unaligned = xsk ? xsk->unaligned : false; - return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned) - + return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - order_base_2(linear_stride_sz); } @@ -200,10 +247,11 @@ bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, u8 log_stride_sz, u8 log_num_strides, - u8 page_shift, bool unaligned) + u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { if (log_stride_sz + log_num_strides != - mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned)) + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode)) return false; if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE || @@ -223,8 +271,8 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - bool unaligned = xsk ? xsk->unaligned : false; u8 log_num_strides; u8 log_stride_sz; u8 log_wqe_sz; @@ -233,7 +281,7 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, return false; log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); - log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned); + log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); if (log_wqe_sz < log_stride_sz) return false; @@ -242,19 +290,19 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides, page_shift, - unaligned); + umr_mode); } u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 log_pkts_per_wqe, page_shift, max_log_rq_size; - bool unaligned = xsk ? xsk->unaligned : false; log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk); page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, unaligned); + max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode); /* Numbers are unsigned, don't subtract to avoid underflow. */ if (params->log_rq_mtu_frames < @@ -308,10 +356,10 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - bool unaligned = xsk ? xsk->unaligned : false; - return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned) - + return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); } @@ -460,9 +508,10 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev) int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL); - if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, false)) + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) return -EOPNOTSUPP; if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) @@ -474,11 +523,12 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); bool unaligned = xsk ? xsk->unaligned : false; u16 max_mtu_pkts; - if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, xsk->unaligned)) + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) return -EOPNOTSUPP; if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) @@ -781,16 +831,16 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - bool unaligned = xsk ? xsk->unaligned : false; if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, log_wqe_num_of_strides, - page_shift, unaligned)) { + page_shift, umr_mode)) { mlx5_core_err(mdev, - "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, unaligned %d\n", + "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n", log_wqe_stride_size, log_wqe_num_of_strides, - unaligned); + umr_mode); return -EINVAL; } @@ -974,11 +1024,11 @@ static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - bool unaligned = xsk ? xsk->unaligned : false; u8 umr_wqebbs; - umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, unaligned); + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index a3952afdcbe4..034debd140bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -56,13 +56,23 @@ struct mlx5e_create_sq_param { /* Striding RQ dynamic parameters */ u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); -u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned); -u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned); -u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned); -u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned); -u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned); -u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, bool unaligned); -u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned); +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode); +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); /* Parameter calculations */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index aebc1d5a9004..e12a856331b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -41,7 +41,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); - if (unlikely(rq->mpwqe.unaligned)) { + if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { for (i = 0; i < batch; i++) { dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); @@ -67,7 +67,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); offset = ix * rq->mpwqe.mtts_per_wqe; - if (likely(!rq->mpwqe.unaligned)) + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) offset = MLX5_ALIGNED_MTTS_OCTW(offset); umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 26f1ac4683e7..24aa25da482b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -314,7 +314,9 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, /* Limitation for regular RQ. XSK RQ may clamp the queue length in * mlx5e_mpwqe_get_log_rq_size. */ - u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev, PAGE_SHIFT, false); + u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev, + PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED); param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE, max_log_mpwrq_pkts); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3ee8295c2115..b5a416ff1603 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -69,7 +69,7 @@ #include "en/trap.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, - bool unaligned) + enum mlx5e_mpwrq_umr_mode umr_mode) { u16 umr_wqebbs, max_wqebbs; bool striding_rq_umr; @@ -79,7 +79,7 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_ if (!striding_rq_umr) return false; - umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, unaligned); + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is * calculated from mlx5e_get_max_sq_aligned_wqebbs. @@ -203,6 +203,18 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb); } +static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) +{ + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5_MTT_OCTW(entries); + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return MLX5_KSM_OCTW(entries); + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *wqe) @@ -213,7 +225,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, u8 ds_cnt; ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift, - rq->mpwqe.unaligned), + rq->mpwqe.umr_mode), MLX5_SEND_WQE_DS); cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | @@ -221,8 +233,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, cseg->umr_mkey = rq->mpwqe.umr_mkey_be; ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; - octowords = rq->mpwqe.unaligned ? MLX5_KSM_OCTW(rq->mpwqe.pages_per_wqe) : - MLX5_MTT_OCTW(rq->mpwqe.pages_per_wqe); + octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode); ucseg->xlt_octowords = cpu_to_be16(octowords); ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } @@ -283,9 +294,23 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) return 0; } + +static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) +{ + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5_MKC_ACCESS_MODE_MTT; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return MLX5_MKC_ACCESS_MODE_KSM; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u32 npages, u8 page_shift, u32 *umr_mkey, - dma_addr_t filler_addr, bool unaligned) + dma_addr_t filler_addr, + enum mlx5e_mpwrq_umr_mode umr_mode) { struct mlx5_mtt *mtt; struct mlx5_ksm *ksm; @@ -296,14 +321,16 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, int err; int i; - if (unaligned && !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { + if (umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED && + !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n"); return -EINVAL; } + octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode); + inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in), - unaligned ? sizeof(*ksm) : sizeof(*mtt), - npages); + MLX5_OCTWORD, octwords); if (inlen < 0) return inlen; @@ -311,16 +338,13 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, if (!in) return -ENOMEM; - octwords = unaligned ? MLX5_KSM_OCTW(npages) : MLX5_MTT_OCTW(npages); - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, - unaligned ? MLX5_MKC_ACCESS_MODE_KSM : MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode)); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); @@ -335,19 +359,22 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, * the RQ's pool, while the gaps (wqe_overflow) remain mapped * to the default page. */ - if (unaligned) { + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); for (i = 0; i < npages; i++) ksm[i] = (struct mlx5_ksm) { .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), .va = cpu_to_be64(filler_addr), }; - } else { + break; + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); for (i = 0; i < npages; i++) mtt[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(filler_addr), }; + break; } err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); @@ -396,7 +423,7 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq u32 umr_mkey; int err; - max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.unaligned); + max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode); /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */ if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe, @@ -408,7 +435,7 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, &umr_mkey, rq->wqe_overflow.addr, - rq->mpwqe.unaligned); + rq->mpwqe.umr_mode); rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); return err; } @@ -644,16 +671,16 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - rq->mpwqe.unaligned = xsk ? xsk->unaligned : false; + rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); rq->mpwqe.pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, - rq->mpwqe.unaligned); + rq->mpwqe.umr_mode); rq->mpwqe.umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift, - rq->mpwqe.unaligned); + rq->mpwqe.umr_mode); rq->mpwqe.mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift, - rq->mpwqe.unaligned); + rq->mpwqe.umr_mode); pool_size = rq->mpwqe.pages_per_wqe << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); @@ -5012,7 +5039,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (!!MLX5_CAP_ETH(mdev, lro_cap) && !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && - mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, false)) + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) netdev->vlan_features |= NETIF_F_LRO; netdev->hw_features = netdev->vlan_features; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f8ecb33105d3..285f301a6390 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1288,4 +1288,8 @@ static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) return mlx5_is_roce_on(dev); } +enum { + MLX5_OCTWORD = 16, +}; + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 9f123f740428e96ef2eae8b5e2876b675b6a4605 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:23 -0700 Subject: net/mlx5e: Improve MTT/KSM alignment Make mlx5e_mpwrq_mtts_per_wqe take into account that KSM requires smaller alignment than MTT. Ensure that there is always an even amount of MTTs in a UMR WQE, so that complete octwords are formed, and no garbage is mapped. Drop extra alignment in MLX5_MTT_OCTW that may cause setting too big ucseg->xlt_octowords, also leading to mapping garbage. Generalize some calculations by introducing the MLX5_OCTWORD constant. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +----- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 10 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 +++++-------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a2d09f30acd1..93607db1dea4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -109,12 +109,8 @@ struct page_pool; #define MLX5_MPWRQ_MAX_PAGES_PER_WQE \ rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt)) -#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8)) -#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2) -#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts))) -#define MLX5_KSM_OCTW(ksms) (ksms) #define MLX5E_MAX_RQ_NUM_MTTS \ - (ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ + (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */ #define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */ #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index b57855bf7629..e8c3b8abf941 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -91,6 +91,13 @@ u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1; + /* Two MTTs are needed to form an octword. The number of MTTs is encoded + * in octwords in a UMR WQE, so we need at least two to avoid mapping + * garbage addresses. + */ + if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + pages_per_wqe = 2; + /* Sanity check for further calculations to succeed. */ BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64); if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE)) @@ -131,7 +138,8 @@ u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, * MTU. These oversize packets are dropped by the driver at a later * stage. */ - return MLX5_ALIGN_MTTS(pages_per_wqe + 1); + return ALIGN(pages_per_wqe + 1, + MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode)); } u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index e12a856331b8..4b2df2895505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -66,9 +66,10 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); + /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ offset = ix * rq->mpwqe.mtts_per_wqe; if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) - offset = MLX5_ALIGNED_MTTS_OCTW(offset); + offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b5a416ff1603..2093b6cc6c7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -205,14 +205,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) { - switch (umr_mode) { - case MLX5E_MPWRQ_UMR_MODE_ALIGNED: - return MLX5_MTT_OCTW(entries); - case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: - return MLX5_KSM_OCTW(entries); - } - WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); - return 0; + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + + WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD); + + return entries * umr_entry_size / MLX5_OCTWORD; } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b61604d87701..58084650151f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -682,7 +682,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - offset = MLX5_ALIGNED_MTTS_OCTW(ix * rq->mpwqe.mtts_per_wqe); + offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { -- cgit v1.2.3 From 139213451046eb6653a058a8922796f29b267b0f Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:24 -0700 Subject: net/mlx5e: xsk: Use KLM to protect frame overrun in unaligned mode XSK RQs support striding RQ linear mode, but the stride size may be bigger than the XSK frame size, because: 1. The stride size must be a power of two. 2. The stride size must be equal to the UMR page size. Each XSK frame is treated as a separate page, because they aren't necessarily adjacent in physical memory, so the driver can't put more than one stride per page. 3. The minimal MTT page size is 4096 on older firmware. That means that if XSK frame size is 2048 or not a power of two, the strides may be bigger than XSK frames. Normally, it's not a problem if the hardware enforces the MTU. However, traffic between vports skips the hardware MTU check, and oversized packets may be received. If an oversized packet is bigger than the XSK frame but not bigger than the stride, it will cause overwriting of the adjacent UMEM region. If the packet takes more than one stride, they can be recycled for reuse, so it's not a problem when the XSK frame size matches the stride size. Work around the above issue by leveraging KLM to make a more fine-grained mapping. The beginning of each stride is mapped to the frame memory, and the padding up to the closest power of two is mapped to the overflow page that doesn't belong to UMEM. This way, application data corruption won't happen upon receiving packets bigger than MTU. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/params.c | 45 ++++++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 27 +++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 27 +++++++++++-- 4 files changed, 90 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 93607db1dea4..7c6861d6148d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -680,6 +680,7 @@ struct mlx5e_hw_gro_data { enum mlx5e_mpwrq_umr_mode { MLX5E_MPWRQ_UMR_MODE_ALIGNED, MLX5E_MPWRQ_UMR_MODE_UNALIGNED, + MLX5E_MPWRQ_UMR_MODE_OVERSIZED, }; struct mlx5e_rq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index e8c3b8abf941..203448ee9594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -36,8 +36,28 @@ mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) * 1. MTT - direct mapping in page granularity. * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but * all mappings have the same size. + * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and + * mappings can have different sizes. */ + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); bool unaligned = xsk ? xsk->unaligned : false; + bool oversized = false; + + if (xsk) { + oversized = xsk->chunk_size < (1 << page_shift); + WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift)); + } + + /* XSK frame size doesn't match the UMR page size, either because the + * frame size is not a power of two, or it's smaller than the minimal + * page size supported by the firmware. + * It's possible to receive packets bigger than MTU in certain setups. + * To avoid writing over the XSK frame boundary, the top region of each + * stride is mapped to a garbage page, resulting in two mappings of + * different sizes per frame. + */ + if (oversized) + return MLX5E_MPWRQ_UMR_MODE_OVERSIZED; /* XSK frames can start at arbitrary unaligned locations, but they all * have the same size which is a power of two. It allows to optimize to @@ -60,6 +80,8 @@ u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) return sizeof(struct mlx5_mtt); case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: return sizeof(struct mlx5_ksm); + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return sizeof(struct mlx5_klm) * 2; } WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); return 0; @@ -145,11 +167,21 @@ u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, enum mlx5e_mpwrq_umr_mode umr_mode) { - if (umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED) - return min(MLX5E_MAX_RQ_NUM_KSMS, - 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + /* Same limits apply to KSMs and KLMs. */ + u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS, + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size)); - return MLX5E_MAX_RQ_NUM_MTTS; + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5E_MAX_RQ_NUM_MTTS; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return klm_limit; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + /* Each entry is two KLMs. */ + return klm_limit / 2; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; } static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift, @@ -1084,6 +1116,11 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, xsk.unaligned = true; max_xsk_wqebbs = max(max_xsk_wqebbs, mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is not equal to stride size. */ + xsk.chunk_size -= 1; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); } wqebbs += max_xsk_wqebbs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 4b2df2895505..78d746704345 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -41,7 +41,15 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); - if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; + } + } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { for (i = 0; i < batch; i++) { dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); @@ -51,11 +59,22 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) }; } } else { + __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - + rq->xsk_pool->chunk_size); + __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); + for (i = 0; i < batch; i++) { dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); - umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { - .ptag = cpu_to_be64(addr | MLX5_EN_WR), + umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + .bcount = frame_size, + }; + umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + .bcount = pad_size, }; } } @@ -70,6 +89,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) offset = ix * rq->mpwqe.mtts_per_wqe; if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) + offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2093b6cc6c7c..ae728745379d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -299,6 +299,8 @@ static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) return MLX5_MKC_ACCESS_MODE_MTT; case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: return MLX5_MKC_ACCESS_MODE_KSM; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return MLX5_MKC_ACCESS_MODE_KLMS; } WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); return 0; @@ -307,10 +309,12 @@ static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u32 npages, u8 page_shift, u32 *umr_mkey, dma_addr_t filler_addr, - enum mlx5e_mpwrq_umr_mode umr_mode) + enum mlx5e_mpwrq_umr_mode umr_mode, + u32 xsk_chunk_size) { struct mlx5_mtt *mtt; struct mlx5_ksm *ksm; + struct mlx5_klm *klm; u32 octwords; int inlen; void *mkc; @@ -347,7 +351,8 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET(mkc, mkc, translations_octword_size, octwords); - MLX5_SET(mkc, mkc, log_page_size, page_shift); + if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) + MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); /* Initialize the mkey with all MTTs pointing to a default @@ -357,6 +362,21 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, * to the default page. */ switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) { + klm[i << 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32(xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + klm[(i << 1) + 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + } + break; case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); for (i = 0; i < npages; i++) @@ -415,6 +435,7 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { + u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0; u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq); u32 num_entries, max_num_entries; u32 umr_mkey; @@ -432,7 +453,7 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, &umr_mkey, rq->wqe_overflow.addr, - rq->mpwqe.umr_mode); + rq->mpwqe.umr_mode, xsk_chunk_size); rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); return err; } -- cgit v1.2.3 From c6f0420468fb2e7bbe006ed492608d63a4ac9e28 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:25 -0700 Subject: net/mlx5e: xsk: Print a warning in slow configurations On striding RQ, when the XSK frame size doesn't match the MKey page size, KLM is used for memory mappings, which is a slower mechanism than MTT or KSM. It may happen in two cases: 1. Frame size is not a power of two (only possible in the unaligned mode of XSK). 2. Frame size is 2048 bytes, and the firmware doesn't support MKey pages smaller than 4096 bytes. Depending on the case, print a warning and recommend to disable striding RQ or upgrade the firmware. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 8b09e2f58a4d..ebada0c5af3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -99,6 +99,15 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, mlx5e_build_xsk_param(pool, &xsk); + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { + const char *recommendation = is_power_of_2(xsk.chunk_size) ? + "Upgrade firmware" : "Disable striding RQ"; + + mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n", + xsk.chunk_size, recommendation); + } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { /* XSK objects will be created on open. */ goto validate_closed; -- cgit v1.2.3 From c2c9e31dfa4f23045f72f613c5809d5b030cd27f Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 1 Oct 2022 21:56:26 -0700 Subject: net/mlx5e: xsk: Optimize for unaligned mode with 3072-byte frames When XSK frame size is 3072 (or another power of two multiplied by 3), KLM mechanism for NIC virtual memory page mapping can be optimized by replacing it with KSM. Before this change, two KLM entries were needed to map an XSK frame that is not a power of two: one entry maps the UMEM memory up to the frame length, the other maps the rest of the stride to the garbage page. When the frame length divided by 3 is a power of two, it can be mapped using 3 KSM entries, and the fourth will map the rest of the stride to the garbage page. All 4 KSM entries are of the same size, which allows for a much faster lookup. Frame size 3072 is useful in certain use cases, because it allows packing 4 frames into 3 pages. Generally speaking, other frame sizes equal to PAGE_SIZE minus a power of two can be optimized in a similar way, but it will require many more KSMs per frame, which slows down UMRs a little bit, but more importantly may hit the limit for the maximum number of KSM entries. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/params.c | 20 ++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 25 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 18 ++++++++++++++-- 4 files changed, 61 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7c6861d6148d..26a23047f1f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -681,6 +681,7 @@ enum mlx5e_mpwrq_umr_mode { MLX5E_MPWRQ_UMR_MODE_ALIGNED, MLX5E_MPWRQ_UMR_MODE_UNALIGNED, MLX5E_MPWRQ_UMR_MODE_OVERSIZED, + MLX5E_MPWRQ_UMR_MODE_TRIPLE, }; struct mlx5e_rq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 203448ee9594..29dd3a04c154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -56,8 +56,16 @@ mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) * stride is mapped to a garbage page, resulting in two mappings of * different sizes per frame. */ - if (oversized) + if (oversized) { + /* An optimization for frame sizes equal to 3 * power_of_two. + * 3 KSMs point to the frame, and one KSM points to the garbage + * page, which works faster than KLM. + */ + if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3)) + return MLX5E_MPWRQ_UMR_MODE_TRIPLE; + return MLX5E_MPWRQ_UMR_MODE_OVERSIZED; + } /* XSK frames can start at arbitrary unaligned locations, but they all * have the same size which is a power of two. It allows to optimize to @@ -82,6 +90,8 @@ u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) return sizeof(struct mlx5_ksm); case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: return sizeof(struct mlx5_klm) * 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return sizeof(struct mlx5_ksm) * 4; } WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); return 0; @@ -179,6 +189,9 @@ u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: /* Each entry is two KLMs. */ return klm_limit / 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + /* Each entry is four KSMs. */ + return klm_limit / 4; } WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); return 0; @@ -1121,6 +1134,11 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, xsk.chunk_size -= 1; max_xsk_wqebbs = max(max_xsk_wqebbs, mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is a triple power of two. */ + xsk.chunk_size = (1 << frame_shift) / 4 * 3; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); } wqebbs += max_xsk_wqebbs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 78d746704345..c91b54d9ff27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -58,6 +58,29 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) .va = cpu_to_be64(addr), }; } + } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { + u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); + + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size), + }; + umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size * 2), + }; + umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + }; + } } else { __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - rq->xsk_pool->chunk_size); @@ -91,6 +114,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) + offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ae728745379d..d4f03ff7b0e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -301,6 +301,8 @@ static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) return MLX5_MKC_ACCESS_MODE_KSM; case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: return MLX5_MKC_ACCESS_MODE_KLMS; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return MLX5_MKC_ACCESS_MODE_KSM; } WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); return 0; @@ -322,7 +324,8 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, int err; int i; - if (umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED && + if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED || + umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) && !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n"); return -EINVAL; @@ -351,7 +354,9 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET(mkc, mkc, translations_octword_size, octwords); - if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) + if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) + MLX5_SET(mkc, mkc, log_page_size, page_shift - 2); + else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); @@ -392,6 +397,15 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, .ptag = cpu_to_be64(filler_addr), }; break; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages * 4; i++) { + ksm[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + .va = cpu_to_be64(filler_addr), + }; + } + break; } err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); -- cgit v1.2.3 From 16ab85e78439bab1201ff26ba430231d1574b4ae Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sat, 1 Oct 2022 21:56:27 -0700 Subject: net/mlx5e: Expose rx_oversize_pkts_buffer counter Add the rx_oversize_pkts_buffer counter to ethtool statistics. This counter exposes the number of dropped received packets due to length which arrived to RQ and exceed software buffer size allocated by the device for incoming traffic. It might imply that the device MTU is larger than the software buffers size. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 21 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 8 ++++++-- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d4f03ff7b0e1..364f04309149 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3699,7 +3699,8 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_length_errors = PPORT_802_3_GET(pstats, a_in_range_length_errors) + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + - PPORT_802_3_GET(pstats, a_frame_too_long_errors); + PPORT_802_3_GET(pstats, a_frame_too_long_errors) + + VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small); stats->rx_crc_errors = PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 575717186912..03c1841970f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -641,17 +641,26 @@ static const struct counter_desc vnic_env_stats_dev_oob_desc[] = { VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) }, }; +static const struct counter_desc vnic_env_stats_drop_desc[] = { + { "rx_oversize_pkts_buffer", + VNIC_ENV_OFF(vport_env.eth_wqe_too_small) }, +}; + #define NUM_VNIC_ENV_STEER_COUNTERS(dev) \ (MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \ ARRAY_SIZE(vnic_env_stats_steer_desc) : 0) #define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \ (MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \ ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0) +#define NUM_VNIC_ENV_DROP_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, eth_wqe_too_small) ? \ + ARRAY_SIZE(vnic_env_stats_drop_desc) : 0) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env) { return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) + - NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); + NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) @@ -665,6 +674,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, vnic_env_stats_dev_oob_desc[i].format); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_drop_desc[i].format); + return idx; } @@ -679,6 +693,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env) for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, vnic_env_stats_dev_oob_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_drop_desc, i); + return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 99e321bfb744..9f781085be47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -273,6 +273,10 @@ struct mlx5e_qcounter_stats { u32 rx_if_down_packets; }; +#define VNIC_ENV_GET(vnic_env_stats, c) \ + MLX5_GET(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + struct mlx5e_vnic_env_stats { __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1ad762e22d86..06574d430ff5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1491,7 +1491,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_120[0xa]; u8 log_max_ra_req_dc[0x6]; - u8 reserved_at_130[0x9]; + u8 reserved_at_130[0x2]; + u8 eth_wqe_too_small[0x1]; + u8 reserved_at_133[0x6]; u8 vnic_env_cq_overrun[0x1]; u8 log_max_ra_res_dc[0x6]; @@ -3537,7 +3539,9 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 cq_overrun[0x20]; - u8 reserved_at_220[0xde0]; + u8 eth_wqe_too_small[0x20]; + + u8 reserved_at_220[0xdc0]; }; struct mlx5_ifc_traffic_counter_bits { -- cgit v1.2.3 From 9b98d395b85dd042fe83fb696b1ac02e6c93a520 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sat, 1 Oct 2022 21:56:28 -0700 Subject: net/mlx5: Start health poll at earlier stage of driver load Start health poll at earlier stage, so if fw fatal issue occurred before or during initialization commands such as init_hca or set_hca_cap the poll health can detect and indicate that the driver is already in error state. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 11 ++++++++--- drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 ++++++++++------- include/linux/mlx5/driver.h | 1 + 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 59205ba2ef7b..5bfc54a10621 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -843,9 +843,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); - - if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) - queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) @@ -862,6 +859,14 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) del_timer_sync(&health->timer); } +void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); +} + void mlx5_drain_health_wq(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index daa7442f31c9..0b459d841c3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1092,7 +1092,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout) +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) { int err; @@ -1130,10 +1130,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout) mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); + mlx5_start_health_poll(dev); + err = mlx5_core_enable_hca(dev, 0); if (err) { mlx5_core_err(dev, "enable hca failed\n"); - goto err_cmd_cleanup; + goto stop_health_poll; } err = mlx5_core_set_issi(dev); @@ -1185,8 +1187,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout) mlx5_core_err(dev, "query hca failed\n"); goto reclaim_boot_pages; } - - mlx5_start_health_poll(dev); + mlx5_start_health_fw_log_up(dev); return 0; @@ -1194,6 +1195,8 @@ reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); @@ -1205,7 +1208,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) { int err; - mlx5_stop_health_poll(dev, boot); err = mlx5_cmd_teardown_hca(dev); if (err) { mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); @@ -1213,6 +1215,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) } mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); @@ -1362,7 +1365,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; - err = mlx5_function_setup(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); if (err) goto err_function; @@ -1450,7 +1453,7 @@ int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery) timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); else timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); - err = mlx5_function_setup(dev, timeout); + err = mlx5_function_setup(dev, false, timeout); if (err) goto err_function; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 285f301a6390..a12929bc31b2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1017,6 +1017,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); +void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, -- cgit v1.2.3 From 1330bd9884efc49f5b5ca854cf1185f1bec705d0 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sat, 1 Oct 2022 21:56:29 -0700 Subject: net/mlx5: Set default grace period based on function type Currently, driver sets the same grace period for fw fatal health reporter to any type of function. Since the lower level functions are more vulnerable to fw fatal errors as a result of parent function closure/reload, set a smaller grace period for the lower level functions, as follows: 1. For ECPF: 180 seconds. 2. For PF: 60 seconds. 3. For VF/SF: 30 seconds. Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 5bfc54a10621..86ed87d704f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -702,11 +702,25 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { .dump = mlx5_fw_fatal_reporter_dump, }; -#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000 +#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 +#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 +#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 +#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD + static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct devlink *devlink = priv_to_devlink(dev); + u64 grace_period; + + if (mlx5_core_is_ecpf(dev)) { + grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; + } else if (mlx5_core_is_pf(dev)) { + grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; + } else { + /* VF or SF */ + grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; + } health->fw_reporter = devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, @@ -718,7 +732,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) health->fw_fatal_reporter = devlink_health_reporter_create(devlink, &mlx5_fw_fatal_reporter_ops, - MLX5_REPORTER_FW_GRACEFUL_PERIOD, + grace_period, dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", -- cgit v1.2.3 From 8c9cc1eb90c13ee3ec2a8a52af4e564a9b161047 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sat, 1 Oct 2022 21:56:30 -0700 Subject: net/mlx5: E-Switch, Allow offloading fwd dest flow table with vport Before this commit a fwd dest flow table resulted in ignoring vport dests which is incorrect and is supported. With this commit the dests can be a mix of flow table and vport dests. There is still a limitation that there cannot be more than one flow table dest. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c98c6af21581..4e50df3139c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -483,25 +483,27 @@ esw_setup_dests(struct mlx5_flow_destination *dest, !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) { esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); (*i)++; - } else if (attr->dest_ft) { - esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); - (*i)++; } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { esw_setup_slow_path_dest(dest, flow_act, esw, *i); (*i)++; } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { esw_setup_accept_dest(dest, flow_act, chains, *i); (*i)++; - } else if (attr->dest_chain) { - err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, - 1, 0, *i); - (*i)++; } else if (esw_is_indir_table(esw, attr)) { err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i); } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); } else { *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i); + + if (attr->dest_ft) { + err = esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); + (*i)++; + } else if (attr->dest_chain) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, + 1, 0, *i); + (*i)++; + } } return err; -- cgit v1.2.3 From 909ffe462a18041f064656b796999bb524c72a66 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Sat, 1 Oct 2022 21:56:31 -0700 Subject: net/mlx5: E-switch, Don't update group if qos is not enabled Currently, qos group will be updated and qos will be enabled when unregistering devlink port. Actually no need to update group if qos is not enabled. Add a check to prevent unnecessary enabling and disabling qos for every port. Signed-off-by: Chris Mi Reviewed-by: Dmytro Linkin Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 694c54066955..4f8a24d84a86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -924,12 +924,16 @@ int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { - int err; + int err = 0; mutex_lock(&esw->state_lock); + if (!vport->qos.enabled && !group) + goto unlock; + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); if (!err) err = esw_qos_vport_update_group(esw, vport, group, extack); +unlock: mutex_unlock(&esw->state_lock); return err; } -- cgit v1.2.3 From 794131c40850a9c68ba9955aa7749e92b903d73f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Sat, 1 Oct 2022 21:56:32 -0700 Subject: net/mlx5: E-Switch, Return EBUSY if can't get mode lock It is to avoid tc retrying during device mode change. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0872a214d2a3..70a7a61f9708 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4463,7 +4463,7 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, int err = 0; if (!mlx5_esw_hold(priv->mdev)) - return -EAGAIN; + return -EBUSY; mlx5_esw_get(priv->mdev); -- cgit v1.2.3