From e1d68ea58c7e9ebacd9ad7a99b25a3578fa62182 Mon Sep 17 00:00:00 2001 From: William Tu Date: Sun, 9 Feb 2025 12:17:07 +0200 Subject: net/mlx5e: reduce the max log mpwrq sz for ECPF and reps For the ECPF and representors, reduce the max MPWRQ size from 256KB (18) to 128KB (17). This prepares the later patch for saving representor memory. With Striding RQ, there is a minimum of 4 MPWQEs. So with 128KB of max MPWRQ size, the minimal memory is 4 * 128KB = 512KB. When creating page pool, consider 1500 mtu, the minimal page pool size will be 512KB/4KB = 128 pages = 256 rx ring entries (2 entries per page). Before this patch, setting RX ringsize (ethtool -G rx) to 256 causes driver to allocate page pool size more than it needs due to max MPWRQ is 256KB (18). Ex: 4 * 256KB = 1MB, 1MB/4KB = 256 pages, but actually 128 pages is good enough. Reducing the max MPWRQ to 128KB fixes the limitation. Signed-off-by: William Tu Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250209101716.112774-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 15 +++++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 979fc56205e1..534fdd27c8de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -95,8 +95,6 @@ struct page_pool; #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) -#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 - /* Keep in sync with mlx5e_mpwrq_log_wqe_sz. * These are theoretical maximums, which can be further restricted by * capabilities. These values are used for static resource allocations and diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 64b62ed17b07..e37d4c202bba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -10,6 +10,9 @@ #include #include +#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 +#define MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ 17 + static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) { u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size); @@ -103,18 +106,22 @@ u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); - u8 max_pages_per_wqe, max_log_mpwqe_size; + u8 max_pages_per_wqe, max_log_wqe_size_calc; + u8 max_log_wqe_size_cap; u16 max_wqe_size; /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), MLX5_UMR_FLEX_ALIGNMENT) / umr_entry_size; - max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; + max_log_wqe_size_calc = ilog2(max_pages_per_wqe) + page_shift; + + WARN_ON_ONCE(max_log_wqe_size_calc < MLX5E_ORDER2_MAX_PACKET_MTU); - WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); + max_log_wqe_size_cap = mlx5_core_is_ecpf(mdev) ? + MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ : MLX5_MPWRQ_MAX_LOG_WQE_SZ; - return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); + return min_t(u8, max_log_wqe_size_calc, max_log_wqe_size_cap); } u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, -- cgit v1.2.3 From b9cc8f9d700867aaa77aedddfea85e53d5e5d584 Mon Sep 17 00:00:00 2001 From: William Tu Date: Sun, 9 Feb 2025 12:17:08 +0200 Subject: net/mlx5e: reduce rep rxq depth to 256 for ECPF By experiments, a single queue representor netdev consumes kernel memory around 2.8MB, and 1.8MB out of the 2.8MB is due to page pool for the RXQ. Scaling to a thousand representors consumes 2.8GB, which becomes a memory pressure issue for embedded devices such as BlueField-2 16GB / BlueField-3 32GB memory. Since representor netdevs mostly handles miss traffic, and ideally, most of the traffic will be offloaded, reduce the default non-uplink rep netdev's RXQ default depth from 1024 to 256 if mdev is ecpf eswitch manager. This saves around 1MB of memory per regular RQ, (1024 - 256) * 2KB, allocated from page pool. With rxq depth of 256, the netlink page pool tool reports $./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ --dump page-pool-get {'id': 277, 'ifindex': 9, 'inflight': 128, 'inflight-mem': 786432, 'napi-id': 775}] This is due to mtu 1500 + headroom consumes half pages, so 256 rxq entries consumes around 128 pages (thus create a page pool with size 128), shown above at inflight. Note that each netdev has multiple types of RQs, including Regular RQ, XSK, PTP, Drop, Trap RQ. Since non-uplink representor only supports regular rq, this patch only changes the regular RQ's default depth. Signed-off-by: William Tu Reviewed-by: Bodong Wang Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250209101716.112774-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index fdff9fd8a89e..da399adc8854 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -65,6 +65,7 @@ #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 +#define MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE 0x8 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -855,6 +856,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + if (!mlx5e_is_uplink_rep(priv) && mlx5_core_is_ecpf(mdev)) + params->log_rq_mtu_frames = MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE; /* If netdev is already registered (e.g. move from nic profile to uplink, * RTNL lock must be held before triggering netdev notifiers. -- cgit v1.2.3 From a38cc5706fb9f7dc4ee3a443f61de13ce1e410ed Mon Sep 17 00:00:00 2001 From: William Tu Date: Sun, 9 Feb 2025 12:17:09 +0200 Subject: net/mlx5e: set the tx_queue_len for pfifo_fast By default, the mq netdev creates a pfifo_fast qdisc. On a system with 16 core, the pfifo_fast with 3 bands consumes 16 * 3 * 8 (size of pointer) * 1024 (default tx queue len) = 393KB. The patch sets the tx qlen to representor default value, 128 (1< Reviewed-by: Daniel Jurgens Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250209101716.112774-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index da399adc8854..07f38f472a27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -889,6 +889,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; netdev->watchdog_timeo = 15 * HZ; + if (mlx5_core_is_ecpf(mdev)) + netdev->tx_queue_len = 1 << MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; -- cgit v1.2.3 From 38b3d42e5afa30bb59943b5410edeeb2a55b169d Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Sun, 9 Feb 2025 12:17:10 +0200 Subject: net/mlx5: Rename and move mlx5_esw_query_vport_vhca_id Rename mlx5_esw_query_vport_vhca_id to mlx5_vport_get_vhca_id and move it to vport file. Also, add function declaration to mlx5_core header file. This better represents the function's usage and allows for it to be called from other parts of the mlx5_core driver. Signed-off-by: Akiva Goldberger Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250209101716.112774-10-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 29 ++-------------------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 25 +++++++++++++++++++ 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 20cc01ceee8a..0fa0333106a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4157,37 +4157,12 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); -static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) -{ - int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *query_ctx; - void *hca_caps; - int err; - - *vhca_id = 0; - - query_ctx = kzalloc(query_out_sz, GFP_KERNEL); - if (!query_ctx) - return -ENOMEM; - - err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx); - if (err) - goto out_free; - - hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); - -out_free: - kfree(query_ctx); - return err; -} - int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) { u16 *old_entry, *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) { esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", vport_num, err); @@ -4213,7 +4188,7 @@ void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) u16 *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", vport_num, err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 99de67c3aa74..6fef1005c469 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -346,6 +346,8 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); + static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 0d5f750faa45..d10d4c396040 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1199,6 +1199,31 @@ int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *ou } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_general_cap(dev, vport, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod) { -- cgit v1.2.3 From b820864335c8b39b0e90d04ecbebdeed4d3d6dfd Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Sun, 9 Feb 2025 12:17:11 +0200 Subject: net/mlx5: Expose ICM consumption per function ICM is a portion of the host's memory assigned to a function by the OS through requests made by the NIC's firmware. PF ICM consumption can be accessed directly, while VF/SF ICM consumption can be accessed through their representors in switchdev mode. The value is exposed to the user in granularity of 4KB through the vnic health reporter as follows: $ devlink health diagnose pci/0000:08:00.0 reporter vnic vNIC env counters: total_error_queues: 0 send_queue_priority_update_flow: 0 comp_eq_overrun: 0 async_eq_overrun: 0 cq_overrun: 0 invalid_command: 0 quota_exceeded_command: 0 nic_receive_steering_discard: 0 icm_consumption: 1032 Signed-off-by: Akiva Goldberger Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250209101716.112774-11-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/mlx5.rst | 4 ++ .../mellanox/mlx5/core/diag/reporter_vnic.c | 46 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 41618538fc70..7febe0aecd53 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -280,6 +280,10 @@ Description of the vnic counters: number of packets handled by the VNIC experiencing unexpected steering failure (at any point in steering flow owned by the VNIC, including the FDB for the eswitch owner). +- icm_consumption + amount of Interconnect Host Memory (ICM) consumed by the vnic in + granularity of 4KB. ICM is host memory allocated by SW upon HCA request + and is used for storing data structures that control HCA operation. User commands examples: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index c7216e84ef8c..86253a89c24c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -13,6 +13,50 @@ struct mlx5_vnic_diag_stats { __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; }; +static void mlx5_reporter_vnic_diagnose_counter_icm(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 out_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 in_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 out_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 in_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 cur_alloc_icm; + int vhca_icm_ctrl; + u16 vhca_id; + int err; + + err = mlx5_core_access_reg(dev, in_reg, sizeof(in_reg), out_reg, + sizeof(out_reg), MLX5_REG_NIC_CAP, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading nic_cap_reg failed. err = %d\n", err); + return; + } + vhca_icm_ctrl = MLX5_GET(nic_cap_reg, out_reg, vhca_icm_ctrl); + if (!vhca_icm_ctrl) + return; + + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id_valid, other_vport); + if (other_vport) { + err = mlx5_vport_get_vhca_id(dev, vport_num, &vhca_id); + if (err) { + mlx5_core_warn(dev, "vport to vhca_id failed. vport_num = %d, err = %d\n", + vport_num, err); + return; + } + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id, vhca_id); + } + err = mlx5_core_access_reg(dev, in_icm_reg, sizeof(in_icm_reg), + out_icm_reg, sizeof(out_icm_reg), + MLX5_REG_VHCA_ICM_CTRL, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading vhca_icm_ctrl failed. err = %d\n", err); + return; + } + cur_alloc_icm = MLX5_GET(vhca_icm_ctrl_reg, out_icm_reg, cur_alloc_icm); + devlink_fmsg_u32_pair_put(fmsg, "icm_consumption", cur_alloc_icm); +} + void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, struct devlink_fmsg *fmsg, u16 vport_num, bool other_vport) @@ -59,6 +103,8 @@ void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); } + if (MLX5_CAP_GEN(dev, nic_cap_reg)) + mlx5_reporter_vnic_diagnose_counter_icm(dev, fmsg, vport_num, other_vport); devlink_fmsg_obj_nest_end(fmsg); devlink_fmsg_pair_nest_end(fmsg); -- cgit v1.2.3 From 913175b3f919eacba5e5d303897371f6058648a0 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Sun, 9 Feb 2025 12:17:12 +0200 Subject: net/mlx5e: Move RQs diagnose to a dedicated function Move rx reporter RQs diagnose from mlx5e_rx_reporter_diagnose() to a dedicated function. This change is a preparation for the following series which extends diagnose output for the rx reporter. While at it, also pass a mlx5e_priv pointer to mlx5e_rx_reporter_diagnose_common_config() as this is the argument the latter actually needs. Signed-off-by: Amir Tzin Reviewed-by: Aya Levin Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250209101716.112774-12-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 31 +++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 25d751eba99b..9255ab662af9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -317,10 +317,8 @@ mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx } static void -mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) +mlx5e_rx_reporter_diagnose_common_config(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; struct mlx5e_ptp *ptp_ch = priv->channels.ptp; @@ -340,20 +338,11 @@ static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, devlink_fmsg_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) +static void mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_ptp *ptp_ch = priv->channels.ptp; int i; - mutex_lock(&priv->state_lock); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; - - mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); for (i = 0; i < priv->channels.num; i++) { @@ -367,7 +356,23 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, } if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + mlx5e_rx_reporter_diagnose_common_config(priv, fmsg); + mlx5e_rx_reporter_diagnose_rqs(priv, fmsg); unlock: mutex_unlock(&priv->state_lock); return 0; -- cgit v1.2.3 From 99c55284e85b3e2f28e1fd28e9946233424880d7 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Sun, 9 Feb 2025 12:17:13 +0200 Subject: net/mlx5e: Add direct TIRs to devlink rx reporter diagnose Add "RX resources" tag to the output of rx reporter diagnose callback. Underneath add tag for direct TIRs, for each TIR expose its tirn and the corresponding rqtn. $ sudo devlink health diagnose auxiliary/mlx5_core.eth.0/65535 reporter rx .... rx resources: Direct TIRs: ix: 0 tirn: 20 rqtn: 1 ix: 1 tirn: 21 rqtn: 2 ix: 2 tirn: 22 rqtn: 3 ix: 3 tirn: 23 rqtn: 4 ix: 4 tirn: 24 rqtn: 5 ix: 5 tirn: 25 rqtn: 6 ix: 6 tirn: 26 rqtn: 7 ix: 7 tirn: 27 rqtn: 8 ix: 8 tirn: 28 rqtn: 9 ix: 9 tirn: 29 rqtn: 10 ix: 10 tirn: 30 rqtn: 11 ix: 11 tirn: 31 rqtn: 12 ix: 12 tirn: 32 rqtn: 13 ix: 13 tirn: 33 rqtn: 14 ix: 14 tirn: 34 rqtn: 15 ix: 15 tirn: 35 rqtn: 16 ix: 16 tirn: 36 rqtn: 17 ix: 17 tirn: 37 rqtn: 18 ix: 18 tirn: 38 rqtn: 19 ix: 19 tirn: 39 rqtn: 20 ix: 20 tirn: 40 rqtn: 21 ix: 21 tirn: 41 rqtn: 22 ix: 22 tirn: 42 rqtn: 23 ix: 23 tirn: 43 rqtn: 24 Signed-off-by: Amir Tzin Reviewed-by: Aya Levin Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250209101716.112774-13-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 32 ++++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/rx_res.c | 7 ++++- .../net/ethernet/mellanox/mlx5/core/en/rx_res.h | 2 ++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 9255ab662af9..bb513a22dc66 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -338,6 +338,37 @@ static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, devlink_fmsg_obj_nest_end(fmsg); } +static void mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) +{ + unsigned int max_nch = mlx5e_rx_res_get_max_nch(rx_res); + int i; + + devlink_fmsg_arr_pair_nest_start(fmsg, "Direct TIRs"); + + for (i = 0; i < max_nch; i++) { + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "ix", i); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rx_res_get_tirn_direct(rx_res, i)); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rx_res_get_rqtn_direct(rx_res, i)); + + devlink_fmsg_obj_nest_end(fmsg); + } + + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rx_res *rx_res = priv->rx_res; + + mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX resources"); + mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(rx_res, fmsg); + mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + static void mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { struct mlx5e_ptp *ptp_ch = priv->channels.ptp; @@ -373,6 +404,7 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, mlx5e_rx_reporter_diagnose_common_config(priv, fmsg); mlx5e_rx_reporter_diagnose_rqs(priv, fmsg); + mlx5e_rx_reporter_diagnose_rx_res(priv, fmsg); unlock: mutex_unlock(&priv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index a86eade9a9e0..4e301bb5e305 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -497,6 +497,11 @@ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) mlx5e_rx_res_free(res); } +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res) +{ + return res->max_nch; +} + u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); @@ -522,7 +527,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) return mlx5e_tir_get_tirn(&res->ptp.tir); } -static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 7b1a9f0f1874..391671b09c91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -34,6 +34,8 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res); /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); -- cgit v1.2.3 From 896c92aa7429e1edd1418610e0b73120a89c7b07 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Sun, 9 Feb 2025 12:17:14 +0200 Subject: net/mlx5e: Expose RSS via devlink rx reporter diagnose Underneath "rx resources" tag expose RSS diagnostic information. For each RSS expose its rqtn, TIRs and inner TIRs. $ devlink health diagnose auxiliary/mlx5_core.eth.0/65535 reporter rx ....... RSS: Index: 0 rqtn: 0 TIRs Numbers: tt: TT_IPV4_TCP tirn: 0 tt: TT_IPV6_TCP tirn: 1 tt: TT_IPV4_UDP tirn: 2 tt: TT_IPV6_UDP tirn: 3 tt: TT_IPV4_IPSEC_AH tirn: 4 tt: TT_IPV6_IPSEC_AH tirn: 5 tt: TT_IPV4_IPSEC_ESP tirn: 6 tt: TT_IPV6_IPSEC_ESP tirn: 7 tt: TT_IPV4 tirn: 8 tt: TT_IPV6 tirn: 9 Inner TIRs Numbers: tt: TT_IPV4_TCP tirn: 10 tt: TT_IPV6_TCP tirn: 11 tt: TT_IPV4_UDP tirn: 12 tt: TT_IPV6_UDP tirn: 13 tt: TT_IPV4_IPSEC_AH tirn: 14 tt: TT_IPV6_IPSEC_AH tirn: 15 tt: TT_IPV4_IPSEC_ESP tirn: 16 tt: TT_IPV6_IPSEC_ESP tirn: 17 tt: TT_IPV4 tirn: 18 tt: TT_IPV6 tirn: 19 Index: 2 rqtn: 27 TIRs Numbers: tt: TT_IPV6_TCP tirn: 46 Signed-off-by: Amir Tzin Reviewed-by: Aya Levin Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250209101716.112774-14-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 58 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/rss.c | 15 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en/rss.h | 3 ++ .../net/ethernet/mellanox/mlx5/core/en/rx_res.c | 2 - .../net/ethernet/mellanox/mlx5/core/en/rx_res.h | 3 ++ .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 19 +++++++ .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h | 1 + 7 files changed, 99 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index bb513a22dc66..e75759533ae0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -359,6 +359,63 @@ static void mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res *rx_ devlink_fmsg_arr_pair_nest_end(fmsg); } +static void mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(struct mlx5e_rss *rss, bool inner, + struct devlink_fmsg *fmsg) +{ + bool found_valid_tir = false; + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + if (!mlx5e_rss_valid_tir(rss, tt, inner)) + continue; + + if (!found_valid_tir) { + char *tir_msg = inner ? "Inner TIRs Numbers" : "TIRs Numbers"; + + found_valid_tir = true; + devlink_fmsg_arr_pair_nest_start(fmsg, tir_msg); + } + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_string_pair_put(fmsg, "tt", mlx5_ttc_get_name(tt)); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rss_get_tirn(rss, tt, inner)); + devlink_fmsg_obj_nest_end(fmsg); + } + + if (found_valid_tir) + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss_ix(struct mlx5e_rx_res *rx_res, u32 rss_idx, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rss *rss = mlx5e_rx_res_rss_get(rx_res, rss_idx); + + if (!rss) + return; + + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "Index", rss_idx); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rss_get_rqtn(rss)); + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, false, fmsg); + if (mlx5e_rss_get_inner_ft_support(rss)) + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, true, fmsg); + + devlink_fmsg_obj_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) +{ + int rss_ix; + + devlink_fmsg_arr_pair_nest_start(fmsg, "RSS"); + for (rss_ix = 0; rss_ix < MLX5E_MAX_NUM_RSS; rss_ix++) + mlx5e_rx_reporter_diagnose_rx_res_rss_ix(rx_res, rss_ix, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -366,6 +423,7 @@ static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv, mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX resources"); mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(rx_res, fmsg); + mlx5e_rx_reporter_diagnose_rx_res_rss(rx_res, fmsg); mlx5e_health_fmsg_named_obj_nest_end(fmsg); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index 5f742f896600..0d8ccc7b6c11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -81,6 +81,11 @@ struct mlx5e_rss { refcount_t refcnt; }; +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss) +{ + return rss->inner_ft_support; +} + void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_channels) { rss->indir.actual_table_size = mlx5e_rqt_size(rss->mdev, num_channels); @@ -449,6 +454,16 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, return mlx5e_tir_get_tirn(tir); } +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss) +{ + return mlx5e_rqt_get_rqtn(&rss->rqt); +} + +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner) +{ + return !!rss_get_tir(rss, tt, inner); +} + /* Fill the "tirn" output parameter. * Create the requested TIR if it's its first usage. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index d0df98963c8d..72089f5f473c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -32,8 +32,11 @@ void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss); unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss); +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss); u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss); int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, const struct mlx5e_packet_merge_param *init_pkt_merge_param, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 4e301bb5e305..9d8b2f5f6c96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -5,8 +5,6 @@ #include "channels.h" #include "params.h" -#define MLX5E_MAX_NUM_RSS 16 - struct mlx5e_rx_res { struct mlx5_core_dev *mdev; /* primary */ enum mlx5e_rx_res_features features; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 391671b09c91..05b438043bcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -10,6 +10,8 @@ #include "fs.h" #include "rss.h" +#define MLX5E_MAX_NUM_RSS 16 + struct mlx5e_rx_res; struct mlx5e_channels; @@ -36,6 +38,7 @@ u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res); +bool mlx5_rx_res_rss_inner_ft_support(struct mlx5e_rx_res *res); /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index 9f13cea16446..eb3bd9c7f66e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -61,6 +61,25 @@ static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) } } +static const char *mlx5_traffic_types_names[MLX5_NUM_TT] = { + [MLX5_TT_IPV4_TCP] = "TT_IPV4_TCP", + [MLX5_TT_IPV6_TCP] = "TT_IPV6_TCP", + [MLX5_TT_IPV4_UDP] = "TT_IPV4_UDP", + [MLX5_TT_IPV6_UDP] = "TT_IPV6_UDP", + [MLX5_TT_IPV4_IPSEC_AH] = "TT_IPV4_IPSEC_AH", + [MLX5_TT_IPV6_IPSEC_AH] = "TT_IPV6_IPSEC_AH", + [MLX5_TT_IPV4_IPSEC_ESP] = "TT_IPV4_IPSEC_ESP", + [MLX5_TT_IPV6_IPSEC_ESP] = "TT_IPV6_IPSEC_ESP", + [MLX5_TT_IPV4] = "TT_IPV4", + [MLX5_TT_IPV6] = "TT_IPV6", + [MLX5_TT_ANY] = "TT_ANY" +}; + +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt) +{ + return mlx5_traffic_types_names[tt]; +} + struct mlx5_etype_proto { u16 etype; u8 proto; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h index 92eea6bea310..ab9434fe3ae6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -49,6 +49,7 @@ struct ttc_params { struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; }; +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt); struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, -- cgit v1.2.3 From 95b9606b15bb3ce1198d28d2393dd0e1f0a5f3e9 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Sun, 9 Feb 2025 12:17:15 +0200 Subject: net/mlx5: Extend Ethtool loopback selftest to support non-linear SKB Current loopback test validation ignores non-linear SKB case in the SKB access, which can lead to failures in scenarios such as when HW GRO is enabled. Linearize the SKB so both cases will be handled. Signed-off-by: Alexei Lazar Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250209101716.112774-15-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 1d60465cc2ca..2f7a543feca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -166,6 +166,9 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, struct udphdr *udph; struct iphdr *iph; + if (skb_linearize(skb)) + goto out; + /* We are only going to peek, no need to clone the SKB */ if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) goto out; -- cgit v1.2.3 From 1a9304859b3a4119579524c293b902a8927180f3 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Sun, 9 Feb 2025 12:17:16 +0200 Subject: net/mlx5: XDP, Enable TX side XDP multi-buffer support In XDP scenarios, fragmented packets can occur if the MTU is larger than the page size, even when the packet size fits within the linear part. If XDP multi-buffer support is disabled, the fragmented part won't be handled in the TX flow, leading to packet drops. Since XDP multi-buffer support is always available, this commit removes the conditional check for enabling it. This ensures that XDP multi-buffer support is always enabled, regardless of the `is_xdp_mb` parameter, and guarantees the handling of fragmented packets in such scenarios. Signed-off-by: Alexei Lazar Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250209101716.112774-16-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - .../net/ethernet/mellanox/mlx5/core/en/params.c | 1 - .../net/ethernet/mellanox/mlx5/core/en/params.h | 1 - .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 49 ++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 29 ------------- 6 files changed, 21 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 534fdd27c8de..769e683f2488 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -384,7 +384,6 @@ enum { MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, - MLX5E_SQ_STATE_XDP_MULTIBUF, MLX5E_NUM_SQ_STATES, /* Must be kept last */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index e37d4c202bba..aa36670d9a36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1247,7 +1247,6 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3f8986f9d862..bd5877acc5b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -33,7 +33,6 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; bool is_mpw; bool is_tls; - bool is_xdp_mb; u16 stop_room; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 09433b91be17..532c7fa94d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -16,7 +16,6 @@ static const char * const sq_sw_state_type_name[] = { [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", - [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", }; static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 3cc4d55613bf..6f3094a479e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -546,6 +546,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, bool inline_ok; bool linear; u16 pi; + int i; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -612,41 +613,33 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - int i; - - memset(&cseg->trailer, 0, sizeof(cseg->trailer)); - memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; - dma_addr_t addr; + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : - page_pool_get_dma_addr(skb_frag_page(frag)) + - skb_frag_off(frag); + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; + dma_addr_t addr; - dseg->addr = cpu_to_be64(addr); - dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); - dseg->lkey = sq->mkey_be; - dseg++; - } + addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + dseg++; + } - sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = num_wqebbs, - .num_pkts = 1, - }; + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->pc += num_wqebbs; - } else { - cseg->fm_ce_se = 0; + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = 1, + }; - sq->pc++; - } + sq->pc += num_wqebbs; xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, eseg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2fdc86432ac0..5d5e7b19c396 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2023,41 +2023,12 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - if (param->is_xdp_mb) - set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; mlx5e_set_xmit_fp(sq, param->is_mpw); - if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; - unsigned int inline_hdr_sz = 0; - int i; - - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; - ds_cnt++; - } - - /* Pre initialize fixed WQE fields */ - for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - - sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = 1, - .num_pkts = 1, - }; - - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - } - } - return 0; err_free_xdpsq: -- cgit v1.2.3