diff options
72 files changed, 3425 insertions, 676 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a7e0f11def8e..d36bee3f1f04 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10453,6 +10453,7 @@ M: Srujana Challa <schalla@marvell.com> L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/marvell/ +F: include/linux/soc/marvell/octeontx2/ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) M: Mirko Lindner <mlindner@marvell.com> @@ -10525,6 +10526,7 @@ M: hariprasad <hkelam@marvell.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/marvell/octeontx2/nic/ +F: include/linux/soc/marvell/octeontx2/ MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER M: Sunil Goutham <sgoutham@marvell.com> diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index f9d4d234a2af..5bae5e859c81 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -689,6 +689,7 @@ struct hnae3_knic_private_info { struct hnae3_roce_private_info { struct net_device *netdev; void __iomem *roce_io_base; + void __iomem *roce_mem_base; int base_vector; int num_vectors; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 999a2aaad847..632ad4257b18 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3645,8 +3645,6 @@ map_ring_fail: static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv) { -#define HNS3_VECTOR_PF_MAX_NUM 64 - struct hnae3_handle *h = priv->ae_handle; struct hns3_enet_tqp_vector *tqp_vector; struct hnae3_vector_info *vector; @@ -3659,7 +3657,6 @@ static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv) /* RSS size, cpu online and vector_num should be the same */ /* Should consider 2p/4p later */ vector_num = min_t(u16, num_online_cpus(), tqp_num); - vector_num = min_t(u16, vector_num, HNS3_VECTOR_PF_MAX_NUM); vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector), GFP_KERNEL); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 5b7967c309b8..6d7ba2052848 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -307,6 +307,9 @@ enum hclge_opcode_type { #define HCLGE_TQP_REG_OFFSET 0x80000 #define HCLGE_TQP_REG_SIZE 0x200 +#define HCLGE_TQP_MAX_SIZE_DEV_V2 1024 +#define HCLGE_TQP_EXT_REG_OFFSET 0x100 + #define HCLGE_RCB_INIT_QUERY_TIMEOUT 10 #define HCLGE_RCB_INIT_FLAG_EN_B 0 #define HCLGE_RCB_INIT_FLAG_FINI_B 8 @@ -336,7 +339,9 @@ enum hclge_int_type { }; struct hclge_ctrl_vector_chain_cmd { - u8 int_vector_id; +#define HCLGE_VECTOR_ID_L_S 0 +#define HCLGE_VECTOR_ID_L_M GENMASK(7, 0) + u8 int_vector_id_l; u8 int_cause_num; #define HCLGE_INT_TYPE_S 0 #define HCLGE_INT_TYPE_M GENMASK(1, 0) @@ -346,7 +351,9 @@ struct hclge_ctrl_vector_chain_cmd { #define HCLGE_INT_GL_IDX_M GENMASK(14, 13) __le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD]; u8 vfid; - u8 rsv; +#define HCLGE_VECTOR_ID_H_S 8 +#define HCLGE_VECTOR_ID_H_M GENMASK(15, 8) + u8 int_vector_id_h; }; #define HCLGE_MAX_TC_NUM 8 @@ -470,16 +477,13 @@ struct hclge_pf_res_cmd { __le16 tqp_num; __le16 buf_size; __le16 msixcap_localid_ba_nic; - __le16 msixcap_localid_ba_rocee; -#define HCLGE_MSIX_OFT_ROCEE_S 0 -#define HCLGE_MSIX_OFT_ROCEE_M GENMASK(15, 0) -#define HCLGE_PF_VEC_NUM_S 0 -#define HCLGE_PF_VEC_NUM_M GENMASK(7, 0) - __le16 pf_intr_vector_number; + __le16 msixcap_localid_number_nic; + __le16 pf_intr_vector_number_roce; __le16 pf_own_fun_number; __le16 tx_buf_size; __le16 dv_buf_size; - __le32 rsv[2]; + __le16 ext_tqp_num; + u8 rsv[6]; }; #define HCLGE_CFG_OFFSET_S 0 @@ -643,7 +647,6 @@ struct hclge_config_mac_speed_dup_cmd { u8 rsv[22]; }; -#define HCLGE_RING_ID_MASK GENMASK(9, 0) #define HCLGE_TQP_ENABLE_B 0 #define HCLGE_MAC_CFG_AN_EN_B 0 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 16df050e72cf..bedbc118c4a3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -498,6 +498,9 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "PG_P pg_id: %u\n", pg_shap_cfg_cmd->pg_id); dev_info(&hdev->pdev->dev, "PG_P pg_shapping: 0x%x\n", le32_to_cpu(pg_shap_cfg_cmd->pg_shapping_para)); + dev_info(&hdev->pdev->dev, "PG_P flag: %#x\n", pg_shap_cfg_cmd->flag); + dev_info(&hdev->pdev->dev, "PG_P pg_rate: %u(Mbps)\n", + le32_to_cpu(pg_shap_cfg_cmd->pg_rate)); cmd = HCLGE_OPC_TM_PORT_SHAPPING; hclge_cmd_setup_basic_desc(&desc, cmd, true); @@ -508,6 +511,9 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev) port_shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data; dev_info(&hdev->pdev->dev, "PORT port_shapping: 0x%x\n", le32_to_cpu(port_shap_cfg_cmd->port_shapping_para)); + dev_info(&hdev->pdev->dev, "PORT flag: %#x\n", port_shap_cfg_cmd->flag); + dev_info(&hdev->pdev->dev, "PORT port_rate: %u(Mbps)\n", + le32_to_cpu(port_shap_cfg_cmd->port_rate)); cmd = HCLGE_OPC_TM_PG_SCH_MODE_CFG; hclge_cmd_setup_basic_desc(&desc, cmd, true); @@ -655,6 +661,9 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "PRI_C pri_id: %u\n", shap_cfg_cmd->pri_id); dev_info(&hdev->pdev->dev, "PRI_C pri_shapping: 0x%x\n", le32_to_cpu(shap_cfg_cmd->pri_shapping_para)); + dev_info(&hdev->pdev->dev, "PRI_C flag: %#x\n", shap_cfg_cmd->flag); + dev_info(&hdev->pdev->dev, "PRI_C pri_rate: %u(Mbps)\n", + le32_to_cpu(shap_cfg_cmd->pri_rate)); cmd = HCLGE_OPC_TM_PRI_P_SHAPPING; hclge_cmd_setup_basic_desc(&desc, cmd, true); @@ -666,6 +675,9 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "PRI_P pri_id: %u\n", shap_cfg_cmd->pri_id); dev_info(&hdev->pdev->dev, "PRI_P pri_shapping: 0x%x\n", le32_to_cpu(shap_cfg_cmd->pri_shapping_para)); + dev_info(&hdev->pdev->dev, "PRI_P flag: %#x\n", shap_cfg_cmd->flag); + dev_info(&hdev->pdev->dev, "PRI_P pri_rate: %u(Mbps)\n", + le32_to_cpu(shap_cfg_cmd->pri_rate)); hclge_dbg_dump_tm_pg(hdev); @@ -681,14 +693,17 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, { struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd; struct hclge_nq_to_qs_link_cmd *nq_to_qs_map; + u32 qset_mapping[HCLGE_BP_EXT_GRP_NUM]; struct hclge_qs_to_pri_link_cmd *map; struct hclge_tqp_tx_queue_tc_cmd *tc; enum hclge_opcode_type cmd; struct hclge_desc desc; int queue_id, group_id; - u32 qset_mapping[32]; int tc_id, qset_id; int pri_id, ret; + u16 qs_id_l; + u16 qs_id_h; + u8 grp_num; u32 i; ret = kstrtouint(cmd_buf, 0, &queue_id); @@ -701,7 +716,24 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) goto err_tm_map_cmd_send; - qset_id = le16_to_cpu(nq_to_qs_map->qset_id) & 0x3FF; + qset_id = le16_to_cpu(nq_to_qs_map->qset_id); + + /* convert qset_id to the following format, drop the vld bit + * | qs_id_h | vld | qs_id_l | + * qset_id: | 15 ~ 11 | 10 | 9 ~ 0 | + * \ \ / / + * \ \ / / + * qset_id: | 15 | 14 ~ 10 | 9 ~ 0 | + */ + qs_id_l = hnae3_get_field(qset_id, HCLGE_TM_QS_ID_L_MSK, + HCLGE_TM_QS_ID_L_S); + qs_id_h = hnae3_get_field(qset_id, HCLGE_TM_QS_ID_H_EXT_MSK, + HCLGE_TM_QS_ID_H_EXT_S); + qset_id = 0; + hnae3_set_field(qset_id, HCLGE_TM_QS_ID_L_MSK, HCLGE_TM_QS_ID_L_S, + qs_id_l); + hnae3_set_field(qset_id, HCLGE_TM_QS_ID_H_MSK, HCLGE_TM_QS_ID_H_S, + qs_id_h); cmd = HCLGE_OPC_TM_QS_TO_PRI_LINK; map = (struct hclge_qs_to_pri_link_cmd *)desc.data; @@ -731,9 +763,11 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, return; } + grp_num = hdev->num_tqps <= HCLGE_TQP_MAX_SIZE_DEV_V2 ? + HCLGE_BP_GRP_NUM : HCLGE_BP_EXT_GRP_NUM; cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING; bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data; - for (group_id = 0; group_id < 32; group_id++) { + for (group_id = 0; group_id < grp_num; group_id++) { hclge_cmd_setup_basic_desc(&desc, cmd, true); bp_to_qs_map_cmd->tc_id = tc_id; bp_to_qs_map_cmd->qs_group_id = group_id; @@ -748,7 +782,7 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, dev_info(&hdev->pdev->dev, "index | tm bp qset maping:\n"); i = 0; - for (group_id = 0; group_id < 4; group_id++) { + for (group_id = 0; group_id < grp_num / 8; group_id++) { dev_info(&hdev->pdev->dev, "%04d | %08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n", group_id * 256, qset_mapping[(u32)(i + 7)], @@ -1379,6 +1413,7 @@ static void hclge_dbg_dump_qs_shaper_single(struct hclge_dev *hdev, u16 qsid) u8 ir_u, ir_b, ir_s, bs_b, bs_s; struct hclge_desc desc; u32 shapping_para; + u32 rate; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, true); @@ -1400,10 +1435,11 @@ static void hclge_dbg_dump_qs_shaper_single(struct hclge_dev *hdev, u16 qsid) ir_s = hclge_tm_get_field(shapping_para, IR_S); bs_b = hclge_tm_get_field(shapping_para, BS_B); bs_s = hclge_tm_get_field(shapping_para, BS_S); + rate = le32_to_cpu(shap_cfg_cmd->qs_rate); dev_info(&hdev->pdev->dev, - "qs%u ir_b:%u, ir_u:%u, ir_s:%u, bs_b:%u, bs_s:%u\n", - qsid, ir_b, ir_u, ir_s, bs_b, bs_s); + "qs%u ir_b:%u, ir_u:%u, ir_s:%u, bs_b:%u, bs_s:%u, flag:%#x, rate:%u(Mbps)\n", + qsid, ir_b, ir_u, ir_s, bs_b, bs_s, shap_cfg_cmd->flag, rate); } static void hclge_dbg_dump_qs_shaper_all(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 710200119fe8..500cc19225f3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -556,7 +556,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_RX_STATS, true); - desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff)); + desc[0].data[0] = cpu_to_le32(tqp->index); ret = hclge_cmd_send(&hdev->hw, desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -576,7 +576,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) HCLGE_OPC_QUERY_TX_STATS, true); - desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff)); + desc[0].data[0] = cpu_to_le32(tqp->index); ret = hclge_cmd_send(&hdev->hw, desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -886,7 +886,8 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) } req = (struct hclge_pf_res_cmd *)desc.data; - hdev->num_tqps = le16_to_cpu(req->tqp_num); + hdev->num_tqps = le16_to_cpu(req->tqp_num) + + le16_to_cpu(req->ext_tqp_num); hdev->pkt_buf_size = le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S; if (req->tx_buf_size) @@ -905,35 +906,24 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) hdev->dv_buf_size = roundup(hdev->dv_buf_size, HCLGE_BUF_SIZE_UNIT); + hdev->num_nic_msi = le16_to_cpu(req->msixcap_localid_number_nic); + if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) { + dev_err(&hdev->pdev->dev, + "only %u msi resources available, not enough for pf(min:2).\n", + hdev->num_nic_msi); + return -EINVAL; + } + if (hnae3_dev_roce_supported(hdev)) { - hdev->roce_base_msix_offset = - hnae3_get_field(le16_to_cpu(req->msixcap_localid_ba_rocee), - HCLGE_MSIX_OFT_ROCEE_M, HCLGE_MSIX_OFT_ROCEE_S); hdev->num_roce_msi = - hnae3_get_field(le16_to_cpu(req->pf_intr_vector_number), - HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); - - /* nic's msix numbers is always equals to the roce's. */ - hdev->num_nic_msi = hdev->num_roce_msi; + le16_to_cpu(req->pf_intr_vector_number_roce); /* PF should have NIC vectors and Roce vectors, * NIC vectors are queued before Roce vectors. */ - hdev->num_msi = hdev->num_roce_msi + - hdev->roce_base_msix_offset; + hdev->num_msi = hdev->num_nic_msi + hdev->num_roce_msi; } else { - hdev->num_msi = - hnae3_get_field(le16_to_cpu(req->pf_intr_vector_number), - HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); - - hdev->num_nic_msi = hdev->num_msi; - } - - if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) { - dev_err(&hdev->pdev->dev, - "Just %u msi resources, not enough for pf(min:2).\n", - hdev->num_nic_msi); - return -EINVAL; + hdev->num_msi = hdev->num_nic_msi; } return 0; @@ -1598,8 +1588,20 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev) tqp->q.buf_size = hdev->rx_buf_len; tqp->q.tx_desc_num = hdev->num_tx_desc; tqp->q.rx_desc_num = hdev->num_rx_desc; - tqp->q.io_base = hdev->hw.io_base + HCLGE_TQP_REG_OFFSET + - i * HCLGE_TQP_REG_SIZE; + + /* need an extended offset to configure queues >= + * HCLGE_TQP_MAX_SIZE_DEV_V2 + */ + if (i < HCLGE_TQP_MAX_SIZE_DEV_V2) + tqp->q.io_base = hdev->hw.io_base + + HCLGE_TQP_REG_OFFSET + + i * HCLGE_TQP_REG_SIZE; + else + tqp->q.io_base = hdev->hw.io_base + + HCLGE_TQP_REG_OFFSET + + HCLGE_TQP_EXT_REG_OFFSET + + (i - HCLGE_TQP_MAX_SIZE_DEV_V2) * + HCLGE_TQP_REG_SIZE; tqp++; } @@ -2412,17 +2414,18 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) { struct hnae3_handle *roce = &vport->roce; struct hnae3_handle *nic = &vport->nic; + struct hclge_dev *hdev = vport->back; roce->rinfo.num_vectors = vport->back->num_roce_msi; - if (vport->back->num_msi_left < vport->roce.rinfo.num_vectors || - vport->back->num_msi_left == 0) + if (hdev->num_msi < hdev->num_nic_msi + hdev->num_roce_msi) return -EINVAL; - roce->rinfo.base_vector = vport->back->roce_base_vector; + roce->rinfo.base_vector = hdev->roce_base_vector; roce->rinfo.netdev = nic->kinfo.netdev; - roce->rinfo.roce_io_base = vport->back->hw.io_base; + roce->rinfo.roce_io_base = hdev->hw.io_base; + roce->rinfo.roce_mem_base = hdev->hw.mem_base; roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; @@ -2456,7 +2459,7 @@ static int hclge_init_msi(struct hclge_dev *hdev) hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = hdev->base_msi_vector + - hdev->roce_base_msix_offset; + hdev->num_nic_msi; hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, sizeof(u16), GFP_KERNEL); @@ -4129,6 +4132,30 @@ struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle) return container_of(handle, struct hclge_vport, nic); } +static void hclge_get_vector_info(struct hclge_dev *hdev, u16 idx, + struct hnae3_vector_info *vector_info) +{ +#define HCLGE_PF_MAX_VECTOR_NUM_DEV_V2 64 + + vector_info->vector = pci_irq_vector(hdev->pdev, idx); + + /* need an extend offset to config vector >= 64 */ + if (idx - 1 < HCLGE_PF_MAX_VECTOR_NUM_DEV_V2) + vector_info->io_addr = hdev->hw.io_base + + HCLGE_VECTOR_REG_BASE + + (idx - 1) * HCLGE_VECTOR_REG_OFFSET; + else + vector_info->io_addr = hdev->hw.io_base + + HCLGE_VECTOR_EXT_REG_BASE + + (idx - 1) / HCLGE_PF_MAX_VECTOR_NUM_DEV_V2 * + HCLGE_VECTOR_REG_OFFSET_H + + (idx - 1) % HCLGE_PF_MAX_VECTOR_NUM_DEV_V2 * + HCLGE_VECTOR_REG_OFFSET; + + hdev->vector_status[idx] = hdev->vport[0].vport_id; + hdev->vector_irq[idx] = vector_info->vector; +} + static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num, struct hnae3_vector_info *vector_info) { @@ -4136,23 +4163,16 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num, struct hnae3_vector_info *vector = vector_info; struct hclge_dev *hdev = vport->back; int alloc = 0; - int i, j; + u16 i = 0; + u16 j; vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num); vector_num = min(hdev->num_msi_left, vector_num); for (j = 0; j < vector_num; j++) { - for (i = 1; i < hdev->num_msi; i++) { + while (++i < hdev->num_nic_msi) { if (hdev->vector_status[i] == HCLGE_INVALID_VPORT) { - vector->vector = pci_irq_vector(hdev->pdev, i); - vector->io_addr = hdev->hw.io_base + - HCLGE_VECTOR_REG_BASE + - (i - 1) * HCLGE_VECTOR_REG_OFFSET + - vport->vport_id * - HCLGE_VECTOR_VF_OFFSET; - hdev->vector_status[i] = vport->vport_id; - hdev->vector_irq[i] = vector->vector; - + hclge_get_vector_info(hdev, i, vector); vector++; alloc++; @@ -4701,7 +4721,12 @@ int hclge_bind_ring_with_vector(struct hclge_vport *vport, op = en ? HCLGE_OPC_ADD_RING_TO_VECTOR : HCLGE_OPC_DEL_RING_TO_VECTOR; hclge_cmd_setup_basic_desc(&desc, op, false); - req->int_vector_id = vector_id; + req->int_vector_id_l = hnae3_get_field(vector_id, + HCLGE_VECTOR_ID_L_M, + HCLGE_VECTOR_ID_L_S); + req->int_vector_id_h = hnae3_get_field(vector_id, + HCLGE_VECTOR_ID_H_M, + HCLGE_VECTOR_ID_H_S); i = 0; for (node = ring_chain; node; node = node->next) { @@ -4733,7 +4758,14 @@ int hclge_bind_ring_with_vector(struct hclge_vport *vport, hclge_cmd_setup_basic_desc(&desc, op, false); - req->int_vector_id = vector_id; + req->int_vector_id_l = + hnae3_get_field(vector_id, + HCLGE_VECTOR_ID_L_M, + HCLGE_VECTOR_ID_L_S); + req->int_vector_id_h = + hnae3_get_field(vector_id, + HCLGE_VECTOR_ID_H_M, + HCLGE_VECTOR_ID_H_S); } } @@ -6852,7 +6884,7 @@ static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); - req->tqp_id = cpu_to_le16(tqp_id & HCLGE_RING_ID_MASK); + req->tqp_id = cpu_to_le16(tqp_id); req->stream_id = cpu_to_le16(stream_id); if (enable) req->enable |= 1U << HCLGE_TQP_ENABLE_B; @@ -9314,7 +9346,7 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, false); req = (struct hclge_reset_tqp_queue_cmd *)desc.data; - req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK); + req->tqp_id = cpu_to_le16(queue_id); if (enable) hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, 1U); @@ -9337,7 +9369,7 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, true); req = (struct hclge_reset_tqp_queue_cmd *)desc.data; - req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK); + req->tqp_id = cpu_to_le16(queue_id); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -9877,6 +9909,28 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, } } +static int hclge_dev_mem_map(struct hclge_dev *hdev) +{ +#define HCLGE_MEM_BAR 4 + + struct pci_dev *pdev = hdev->pdev; + struct hclge_hw *hw = &hdev->hw; + + /* for device does not have device memory, return directly */ + if (!(pci_select_bars(pdev, IORESOURCE_MEM) & BIT(HCLGE_MEM_BAR))) + return 0; + + hw->mem_base = devm_ioremap_wc(&pdev->dev, + pci_resource_start(pdev, HCLGE_MEM_BAR), + pci_resource_len(pdev, HCLGE_MEM_BAR)); + if (!hw->mem_base) { + dev_err(&pdev->dev, "failed to map device memroy\n"); + return -EFAULT; + } + + return 0; +} + static int hclge_pci_init(struct hclge_dev *hdev) { struct pci_dev *pdev = hdev->pdev; @@ -9915,9 +9969,16 @@ static int hclge_pci_init(struct hclge_dev *hdev) goto err_clr_master; } + ret = hclge_dev_mem_map(hdev); + if (ret) + goto err_unmap_io_base; + hdev->num_req_vfs = pci_sriov_get_totalvfs(pdev); return 0; + +err_unmap_io_base: + pcim_iounmap(pdev, hdev->hw.io_base); err_clr_master: pci_clear_master(pdev); pci_release_regions(pdev); @@ -9931,6 +9992,9 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) { struct pci_dev *pdev = hdev->pdev; + if (hdev->hw.mem_base) + devm_iounmap(&pdev->dev, hdev->hw.mem_base); + pcim_iounmap(pdev, hdev->hw.io_base); pci_free_irq_vectors(pdev); pci_clear_master(pdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 64e6afdb61b8..bd17685e4065 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -27,9 +27,11 @@ (HCLGE_PF_CFG_BLOCK_SIZE / HCLGE_CFG_RD_LEN_BYTES) #define HCLGE_VECTOR_REG_BASE 0x20000 +#define HCLGE_VECTOR_EXT_REG_BASE 0x30000 #define HCLGE_MISC_VECTOR_REG_BASE 0x20400 #define HCLGE_VECTOR_REG_OFFSET 0x4 +#define HCLGE_VECTOR_REG_OFFSET_H 0x1000 #define HCLGE_VECTOR_VF_OFFSET 0x100000 #define HCLGE_CMDQ_TX_ADDR_L_REG 0x27000 @@ -278,6 +280,7 @@ struct hclge_mac { struct hclge_hw { void __iomem *io_base; + void __iomem *mem_base; struct hclge_mac mac; int num_vec; struct hclge_cmq cmq; @@ -767,7 +770,6 @@ struct hclge_dev { u16 num_msi; u16 num_msi_left; u16 num_msi_used; - u16 roce_base_msix_offset; u32 base_msi_vector; u16 *vector_status; int *vector_irq; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index e8495f58a1a8..54767b06ca6b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -302,12 +302,30 @@ static int hclge_tm_q_to_qs_map_cfg(struct hclge_dev *hdev, { struct hclge_nq_to_qs_link_cmd *map; struct hclge_desc desc; + u16 qs_id_l; + u16 qs_id_h; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_NQ_TO_QS_LINK, false); map = (struct hclge_nq_to_qs_link_cmd *)desc.data; map->nq_id = cpu_to_le16(q_id); + + /* convert qs_id to the following format to support qset_id >= 1024 + * qs_id: | 15 | 14 ~ 10 | 9 ~ 0 | + * / / \ \ + * / / \ \ + * qset_id: | 15 ~ 11 | 10 | 9 ~ 0 | + * | qs_id_h | vld | qs_id_l | + */ + qs_id_l = hnae3_get_field(qs_id, HCLGE_TM_QS_ID_L_MSK, + HCLGE_TM_QS_ID_L_S); + qs_id_h = hnae3_get_field(qs_id, HCLGE_TM_QS_ID_H_MSK, + HCLGE_TM_QS_ID_H_S); + hnae3_set_field(qs_id, HCLGE_TM_QS_ID_L_MSK, HCLGE_TM_QS_ID_L_S, + qs_id_l); + hnae3_set_field(qs_id, HCLGE_TM_QS_ID_H_EXT_MSK, HCLGE_TM_QS_ID_H_EXT_S, + qs_id_h); map->qset_id = cpu_to_le16(qs_id | HCLGE_TM_Q_QS_LINK_VLD_MSK); return hclge_cmd_send(&hdev->hw, &desc, 1); @@ -377,7 +395,7 @@ static u32 hclge_tm_get_shapping_para(u8 ir_b, u8 ir_u, u8 ir_s, static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, enum hclge_shap_bucket bucket, u8 pg_id, - u32 shapping_para) + u32 shapping_para, u32 rate) { struct hclge_pg_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; @@ -393,6 +411,10 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para); + hnae3_set_bit(shap_cfg_cmd->flag, HCLGE_TM_RATE_VLD, 1); + + shap_cfg_cmd->pg_rate = cpu_to_le32(rate); + return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -420,12 +442,16 @@ static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para); + hnae3_set_bit(shap_cfg_cmd->flag, HCLGE_TM_RATE_VLD, 1); + + shap_cfg_cmd->port_rate = cpu_to_le32(hdev->hw.mac.speed); + return hclge_cmd_send(&hdev->hw, &desc, 1); } static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, enum hclge_shap_bucket bucket, u8 pri_id, - u32 shapping_para) + u32 shapping_para, u32 rate) { struct hclge_pri_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; @@ -442,6 +468,10 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para); + hnae3_set_bit(shap_cfg_cmd->flag, HCLGE_TM_RATE_VLD, 1); + + shap_cfg_cmd->pri_rate = cpu_to_le32(rate); + return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -543,6 +573,9 @@ int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate) shap_cfg_cmd->qs_id = cpu_to_le16(vport->qs_offset + i); shap_cfg_cmd->qs_shapping_para = cpu_to_le32(shaper_para); + hnae3_set_bit(shap_cfg_cmd->flag, HCLGE_TM_RATE_VLD, 1); + shap_cfg_cmd->qs_rate = cpu_to_le32(max_tx_rate); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -744,9 +777,10 @@ static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev) /* Pg to pri */ for (i = 0; i < hdev->tm_info.num_pg; i++) { + u32 rate = hdev->tm_info.pg_info[i].bw_limit; + /* Calc shaper para */ - ret = hclge_shaper_para_calc(hdev->tm_info.pg_info[i].bw_limit, - HCLGE_SHAPER_LVL_PG, + ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PG, &ir_para, max_tm_rate); if (ret) return ret; @@ -756,7 +790,7 @@ static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev) HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pg_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i, - shaper_para); + shaper_para, rate); if (ret) return ret; @@ -767,7 +801,7 @@ static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev) HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pg_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i, - shaper_para); + shaper_para, rate); if (ret) return ret; } @@ -873,8 +907,9 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev) u32 i; for (i = 0; i < hdev->tm_info.num_tc; i++) { - ret = hclge_shaper_para_calc(hdev->tm_info.tc_info[i].bw_limit, - HCLGE_SHAPER_LVL_PRI, + u32 rate = hdev->tm_info.tc_info[i].bw_limit; + + ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI, &ir_para, max_tm_rate); if (ret) return ret; @@ -883,7 +918,7 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev) HCLGE_SHAPER_BS_U_DEF, HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i, - shaper_para); + shaper_para, rate); if (ret) return ret; @@ -893,7 +928,7 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev) HCLGE_SHAPER_BS_U_DEF, HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i, - shaper_para); + shaper_para, rate); if (ret) return ret; } @@ -918,7 +953,8 @@ static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport) HCLGE_SHAPER_BS_U_DEF, HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, - vport->vport_id, shaper_para); + vport->vport_id, shaper_para, + vport->bw_limit); if (ret) return ret; @@ -927,7 +963,8 @@ static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport) HCLGE_SHAPER_BS_U_DEF, HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, - vport->vport_id, shaper_para); + vport->vport_id, shaper_para, + vport->bw_limit); if (ret) return ret; @@ -1296,15 +1333,23 @@ static int hclge_pfc_setup_hw(struct hclge_dev *hdev) hdev->tm_info.pfc_en); } -/* Each Tc has a 1024 queue sets to backpress, it divides to - * 32 group, each group contains 32 queue sets, which can be - * represented by u32 bitmap. +/* for the queues that use for backpress, divides to several groups, + * each group contains 32 queue sets, which can be represented by u32 bitmap. */ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) { + u16 grp_id_shift = HCLGE_BP_GRP_ID_S; + u16 grp_id_mask = HCLGE_BP_GRP_ID_M; + u8 grp_num = HCLGE_BP_GRP_NUM; int i; - for (i = 0; i < HCLGE_BP_GRP_NUM; i++) { + if (hdev->num_tqps > HCLGE_TQP_MAX_SIZE_DEV_V2) { + grp_num = HCLGE_BP_EXT_GRP_NUM; + grp_id_mask = HCLGE_BP_EXT_GRP_ID_M; + grp_id_shift = HCLGE_BP_EXT_GRP_ID_S; + } + + for (i = 0; i < grp_num; i++) { u32 qs_bitmap = 0; int k, ret; @@ -1313,8 +1358,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) u16 qs_id = vport->qs_offset + tc; u8 grp, sub_grp; - grp = hnae3_get_field(qs_id, HCLGE_BP_GRP_ID_M, - HCLGE_BP_GRP_ID_S); + grp = hnae3_get_field(qs_id, grp_id_mask, grp_id_shift); sub_grp = hnae3_get_field(qs_id, HCLGE_BP_SUB_GRP_ID_M, HCLGE_BP_SUB_GRP_ID_S); if (i == grp) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index bb2a2d8e9259..5498d73ed34b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -39,6 +39,12 @@ struct hclge_nq_to_qs_link_cmd { __le16 nq_id; __le16 rsvd; #define HCLGE_TM_Q_QS_LINK_VLD_MSK BIT(10) +#define HCLGE_TM_QS_ID_L_MSK GENMASK(9, 0) +#define HCLGE_TM_QS_ID_L_S 0 +#define HCLGE_TM_QS_ID_H_MSK GENMASK(14, 10) +#define HCLGE_TM_QS_ID_H_S 10 +#define HCLGE_TM_QS_ID_H_EXT_S 11 +#define HCLGE_TM_QS_ID_H_EXT_MSK GENMASK(15, 11) __le16 qset_id; }; @@ -86,22 +92,34 @@ enum hclge_shap_bucket { HCLGE_TM_SHAP_P_BUCKET, }; +/* set bit HCLGE_TM_RATE_VLD to 1 means use 'rate' to config shaping */ +#define HCLGE_TM_RATE_VLD 0 + struct hclge_pri_shapping_cmd { u8 pri_id; u8 rsvd[3]; __le32 pri_shapping_para; + u8 flag; + u8 rsvd1[3]; + __le32 pri_rate; }; struct hclge_pg_shapping_cmd { u8 pg_id; u8 rsvd[3]; __le32 pg_shapping_para; + u8 flag; + u8 rsvd1[3]; + __le32 pg_rate; }; struct hclge_qs_shapping_cmd { __le16 qs_id; u8 rsvd[2]; __le32 qs_shapping_para; + u8 flag; + u8 rsvd1[3]; + __le32 qs_rate; }; #define HCLGE_BP_GRP_NUM 32 @@ -109,6 +127,11 @@ struct hclge_qs_shapping_cmd { #define HCLGE_BP_SUB_GRP_ID_M GENMASK(4, 0) #define HCLGE_BP_GRP_ID_S 5 #define HCLGE_BP_GRP_ID_M GENMASK(9, 5) + +#define HCLGE_BP_EXT_GRP_NUM 40 +#define HCLGE_BP_EXT_GRP_ID_S 5 +#define HCLGE_BP_EXT_GRP_ID_M GENMASK(10, 5) + struct hclge_bp_to_qs_map_cmd { u8 tc_id; u8 rsvd[2]; @@ -139,6 +162,9 @@ struct hclge_pfc_stats_cmd { struct hclge_port_shapping_cmd { __le32 port_shapping_para; + u8 flag; + u8 rsvd[3]; + __le32 port_rate; }; struct hclge_shaper_ir_para { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index f94f5d443ebc..8b34a632b65a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -111,6 +111,9 @@ enum hclgevf_opcode_type { #define HCLGEVF_TQP_REG_OFFSET 0x80000 #define HCLGEVF_TQP_REG_SIZE 0x200 +#define HCLGEVF_TQP_MAX_SIZE_DEV_V2 1024 +#define HCLGEVF_TQP_EXT_REG_OFFSET 0x100 + struct hclgevf_tqp_map { __le16 tqp_id; /* Absolute tqp id for in this pf */ u8 tqp_vf; /* VF id */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 71007e74e9d2..5d6b419b8a78 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -403,8 +403,20 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev) tqp->q.buf_size = hdev->rx_buf_len; tqp->q.tx_desc_num = hdev->num_tx_desc; tqp->q.rx_desc_num = hdev->num_rx_desc; - tqp->q.io_base = hdev->hw.io_base + HCLGEVF_TQP_REG_OFFSET + - i * HCLGEVF_TQP_REG_SIZE; + + /* need an extended offset to configure queues >= + * HCLGEVF_TQP_MAX_SIZE_DEV_V2. + */ + if (i < HCLGEVF_TQP_MAX_SIZE_DEV_V2) + tqp->q.io_base = hdev->hw.io_base + + HCLGEVF_TQP_REG_OFFSET + + i * HCLGEVF_TQP_REG_SIZE; + else + tqp->q.io_base = hdev->hw.io_base + + HCLGEVF_TQP_REG_OFFSET + + HCLGEVF_TQP_EXT_REG_OFFSET + + (i - HCLGEVF_TQP_MAX_SIZE_DEV_V2) * + HCLGEVF_TQP_REG_SIZE; tqp++; } @@ -2430,6 +2442,7 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev) roce->rinfo.netdev = nic->kinfo.netdev; roce->rinfo.roce_io_base = hdev->hw.io_base; + roce->rinfo.roce_mem_base = hdev->hw.mem_base; roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; @@ -2875,6 +2888,29 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, } } +static int hclgevf_dev_mem_map(struct hclgevf_dev *hdev) +{ +#define HCLGEVF_MEM_BAR 4 + + struct pci_dev *pdev = hdev->pdev; + struct hclgevf_hw *hw = &hdev->hw; + + /* for device does not have device memory, return directly */ + if (!(pci_select_bars(pdev, IORESOURCE_MEM) & BIT(HCLGEVF_MEM_BAR))) + return 0; + + hw->mem_base = devm_ioremap_wc(&pdev->dev, + pci_resource_start(pdev, + HCLGEVF_MEM_BAR), + pci_resource_len(pdev, HCLGEVF_MEM_BAR)); + if (!hw->mem_base) { + dev_err(&pdev->dev, "failed to map device memroy\n"); + return -EFAULT; + } + + return 0; +} + static int hclgevf_pci_init(struct hclgevf_dev *hdev) { struct pci_dev *pdev = hdev->pdev; @@ -2909,8 +2945,14 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev) goto err_clr_master; } + ret = hclgevf_dev_mem_map(hdev); + if (ret) + goto err_unmap_io_base; + return 0; +err_unmap_io_base: + pci_iounmap(pdev, hdev->hw.io_base); err_clr_master: pci_clear_master(pdev); pci_release_regions(pdev); @@ -2924,6 +2966,9 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev) { struct pci_dev *pdev = hdev->pdev; + if (hdev->hw.mem_base) + devm_iounmap(&pdev->dev, hdev->hw.mem_base); + pci_iounmap(pdev, hdev->hw.io_base); pci_clear_master(pdev); pci_release_regions(pdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index c5bcc3894fd5..1b183bc35604 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -164,6 +164,7 @@ struct hclgevf_mac { struct hclgevf_hw { void __iomem *io_base; + void __iomem *mem_base; int num_vec; struct hclgevf_cmq cmq; struct hclgevf_mac mac; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index da15913879f8..9fe43ab0496d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -84,8 +84,6 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *); static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *); -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq); static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64); static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance); static int enable_scrq_irq(struct ibmvnic_adapter *, @@ -306,9 +304,11 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, int count = pool->size - atomic_read(&pool->available); u64 handle = adapter->rx_scrq[pool->index]->handle; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_sub_crq_queue *rx_scrq; + union sub_crq *sub_crq; int buffers_added = 0; unsigned long lpar_rc; - union sub_crq sub_crq; struct sk_buff *skb; unsigned int offset; dma_addr_t dma_addr; @@ -320,8 +320,10 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, if (!pool->active) return; + rx_scrq = adapter->rx_scrq[pool->index]; + ind_bufp = &rx_scrq->ind_buf; for (i = 0; i < count; ++i) { - skb = alloc_skb(pool->buff_size, GFP_ATOMIC); + skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); if (!skb) { dev_err(dev, "Couldn't replenish rx buff\n"); adapter->replenish_no_mem++; @@ -346,12 +348,13 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, pool->rx_buff[index].pool_index = pool->index; pool->rx_buff[index].size = pool->buff_size; - memset(&sub_crq, 0, sizeof(sub_crq)); - sub_crq.rx_add.first = IBMVNIC_CRQ_CMD; - sub_crq.rx_add.correlator = + sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; + memset(sub_crq, 0, sizeof(*sub_crq)); + sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; + sub_crq->rx_add.correlator = cpu_to_be64((u64)&pool->rx_buff[index]); - sub_crq.rx_add.ioba = cpu_to_be32(dma_addr); - sub_crq.rx_add.map_id = pool->long_term_buff.map_id; + sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); + sub_crq->rx_add.map_id = pool->long_term_buff.map_id; /* The length field of the sCRQ is defined to be 24 bits so the * buffer size needs to be left shifted by a byte before it is @@ -361,15 +364,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, #ifdef __LITTLE_ENDIAN__ shift = 8; #endif - sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift); - - lpar_rc = send_subcrq(adapter, handle, &sub_crq); - if (lpar_rc != H_SUCCESS) - goto failure; - - buffers_added++; - adapter->replenish_add_buff_success++; + sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); pool->next_free = (pool->next_free + 1) % pool->size; + if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + i == count - 1) { + lpar_rc = + send_subcrq_indirect(adapter, handle, + (u64)ind_bufp->indir_dma, + (u64)ind_bufp->index); + if (lpar_rc != H_SUCCESS) + goto failure; + buffers_added += ind_bufp->index; + adapter->replenish_add_buff_success += ind_bufp->index; + ind_bufp->index = 0; + } } atomic_add(buffers_added, &pool->available); return; @@ -377,13 +385,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); - pool->free_map[pool->next_free] = index; - pool->rx_buff[index].skb = NULL; - - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; - atomic_add(buffers_added, &pool->available); + for (i = ind_bufp->index - 1; i >= 0; --i) { + struct ibmvnic_rx_buff *rx_buff; + pool->next_free = pool->next_free == 0 ? + pool->size - 1 : pool->next_free - 1; + sub_crq = &ind_bufp->indir_arr[i]; + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(sub_crq->rx_add.correlator); + index = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = index; + dev_kfree_skb_any(pool->rx_buff[index].skb); + pool->rx_buff[index].skb = NULL; + } + ind_bufp->index = 0; if (lpar_rc == H_CLOSED || adapter->failover_pending) { /* Disable buffer pool replenishment and report carrier off if * queue is closed or pending failover. @@ -483,7 +498,7 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) if (rx_pool->buff_size != buff_size) { free_long_term_buff(adapter, &rx_pool->long_term_buff); - rx_pool->buff_size = buff_size; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rc = alloc_long_term_buff(adapter, &rx_pool->long_term_buff, rx_pool->size * @@ -577,7 +592,7 @@ static int init_rx_pools(struct net_device *netdev) rx_pool->size = adapter->req_rx_add_entries_per_subcrq; rx_pool->index = i; - rx_pool->buff_size = buff_size; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rx_pool->active = 1; rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int), @@ -730,6 +745,7 @@ static int init_tx_pools(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); int tx_subcrqs; + u64 buff_size; int i, rc; tx_subcrqs = adapter->num_active_tx_scrqs; @@ -746,9 +762,11 @@ static int init_tx_pools(struct net_device *netdev) adapter->num_active_tx_pools = tx_subcrqs; for (i = 0; i < tx_subcrqs; i++) { + buff_size = adapter->req_mtu + VLAN_HLEN; + buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rc = init_one_tx_pool(netdev, &adapter->tx_pool[i], adapter->req_tx_entries_per_subcrq, - adapter->req_mtu + VLAN_HLEN); + buff_size); if (rc) { release_tx_pools(adapter); return rc; @@ -1148,6 +1166,7 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); @@ -1478,17 +1497,18 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. */ -static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff, +static void build_hdr_descs_arr(struct sk_buff *skb, + union sub_crq *indir_arr, int *num_entries, u8 hdr_field) { int hdr_len[3] = {0, 0, 0}; + u8 hdr_data[140] = {0}; int tot_len; - u8 *hdr_data = txbuff->hdr_data; - tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len, - txbuff->hdr_data); + tot_len = build_hdr_data(hdr_field, skb, hdr_len, + hdr_data); *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, - txbuff->indir_arr + 1); + indir_arr + 1); } static int ibmvnic_xmit_workarounds(struct sk_buff *skb, @@ -1506,17 +1526,95 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb, return 0; } +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff; + struct ibmvnic_tx_pool *tx_pool; + union sub_crq tx_scrq_entry; + int queue_num; + int entries; + int index; + int i; + + ind_bufp = &tx_scrq->ind_buf; + entries = (u64)ind_bufp->index; + queue_num = tx_scrq->pool_index; + + for (i = entries - 1; i >= 0; --i) { + tx_scrq_entry = ind_bufp->indir_arr[i]; + if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC) + continue; + index = be32_to_cpu(tx_scrq_entry.v1.correlator); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[queue_num]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[queue_num]; + } + tx_pool->free_map[tx_pool->consumer_index] = index; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_buff = &tx_pool->tx_buff[index]; + adapter->netdev->stats.tx_packets--; + adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; + adapter->tx_stats_buffers[queue_num].packets--; + adapter->tx_stats_buffers[queue_num].bytes -= + tx_buff->skb->len; + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + adapter->netdev->stats.tx_dropped++; + } + ind_bufp->index = 0; + if (atomic_sub_return(entries, &tx_scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } +} + +static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + u64 dma_addr; + u64 entries; + u64 handle; + int rc; + + ind_bufp = &tx_scrq->ind_buf; + dma_addr = (u64)ind_bufp->indir_dma; + entries = (u64)ind_bufp->index; + handle = tx_scrq->handle; + + if (!entries) + return 0; + rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); + if (rc) + ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + else + ind_bufp->index = 0; + return 0; +} + static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_tx_buff *tx_buff = NULL; struct ibmvnic_sub_crq_queue *tx_scrq; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; + netdev_tx_t ret = NETDEV_TX_OK; unsigned int tx_map_failed = 0; + union sub_crq indir_arr[16]; unsigned int tx_dropped = 0; unsigned int tx_packets = 0; unsigned int tx_bytes = 0; @@ -1529,8 +1627,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned char *dst; int index = 0; u8 proto = 0; - u64 handle; - netdev_tx_t ret = NETDEV_TX_OK; + + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; if (test_bit(0, &adapter->resetting)) { if (!netif_subqueue_stopped(netdev, skb)) @@ -1540,6 +1640,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -1547,6 +1648,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_dropped++; tx_send_failed++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } if (skb_is_gso(skb)) @@ -1554,10 +1656,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) else tx_pool = &adapter->tx_pool[queue_num]; - tx_scrq = adapter->tx_scrq[queue_num]; - txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); - handle = tx_scrq->handle; - index = tx_pool->free_map[tx_pool->consumer_index]; if (index == IBMVNIC_INVALID_MAP) { @@ -1565,6 +1663,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -1600,11 +1699,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_buff = &tx_pool->tx_buff[index]; tx_buff->skb = skb; - tx_buff->data_dma[0] = data_dma_addr; - tx_buff->data_len[0] = skb->len; tx_buff->index = index; tx_buff->pool_index = queue_num; - tx_buff->last_frag = true; memset(&tx_crq, 0, sizeof(tx_crq)); tx_crq.v1.first = IBMVNIC_CRQ_CMD; @@ -1649,55 +1745,29 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); hdrs += 2; } - /* determine if l2/3/4 headers are sent to firmware */ - if ((*hdrs >> 7) & 1) { - build_hdr_descs_arr(tx_buff, &num_entries, *hdrs); - tx_crq.v1.n_crq_elem = num_entries; - tx_buff->num_entries = num_entries; - tx_buff->indir_arr[0] = tx_crq; - tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr, - sizeof(tx_buff->indir_arr), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, tx_buff->indir_dma)) { - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (!firmware_has_feature(FW_FEATURE_CMO)) - dev_err(dev, "tx: unable to map descriptor array\n"); - tx_map_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; - } - lpar_rc = send_subcrq_indirect(adapter, handle, - (u64)tx_buff->indir_dma, - (u64)num_entries); - dma_unmap_single(dev, tx_buff->indir_dma, - sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); - } else { - tx_buff->num_entries = num_entries; - lpar_rc = send_subcrq(adapter, handle, - &tx_crq); - } - if (lpar_rc != H_SUCCESS) { - if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) - dev_err_ratelimited(dev, "tx: send failed\n"); - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (lpar_rc == H_CLOSED || adapter->failover_pending) { - /* Disable TX and report carrier off if queue is closed - * or pending failover. - * Firmware guarantees that a signal will be sent to the - * driver, triggering a reset or some other action. - */ - netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); - } + if ((*hdrs >> 7) & 1) + build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs); - tx_send_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; + tx_crq.v1.n_crq_elem = num_entries; + tx_buff->num_entries = num_entries; + /* flush buffer if current entry can not fit */ + if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_flush_err; + } + + indir_arr[0] = tx_crq; + memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], + num_entries * sizeof(struct ibmvnic_generic_scrq)); + ind_bufp->index += num_entries; + if (__netdev_tx_sent_queue(txq, skb->len, + netdev_xmit_more() && + ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_err; } if (atomic_add_return(num_entries, &tx_scrq->used) @@ -1712,14 +1782,26 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_OK; goto out; -tx_err_out: - /* roll back consumer index and map array*/ - if (tx_pool->consumer_index == 0) - tx_pool->consumer_index = - tx_pool->num_buffers - 1; - else - tx_pool->consumer_index--; - tx_pool->free_map[tx_pool->consumer_index] = index; +tx_flush_err: + dev_kfree_skb_any(skb); + tx_buff->skb = NULL; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_dropped++; +tx_err: + if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) + dev_err_ratelimited(dev, "tx: send failed\n"); + + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable TX and report carrier off if queue is closed + * or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset or some other action. + */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } out: netdev->stats.tx_dropped += tx_dropped; netdev->stats.tx_bytes += tx_bytes; @@ -2368,10 +2450,17 @@ static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, static int ibmvnic_poll(struct napi_struct *napi, int budget) { - struct net_device *netdev = napi->dev; - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int scrq_num = (int)(napi - adapter->napi); - int frames_processed = 0; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + int frames_processed; + int scrq_num; + + netdev = napi->dev; + adapter = netdev_priv(netdev); + scrq_num = (int)(napi - adapter->napi); + frames_processed = 0; + rx_scrq = adapter->rx_scrq[scrq_num]; restart_poll: while (frames_processed < budget) { @@ -2384,14 +2473,14 @@ restart_poll: if (unlikely(test_bit(0, &adapter->resetting) && adapter->reset_reason != VNIC_RESET_NON_FATAL)) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); + enable_scrq_irq(adapter, rx_scrq); napi_complete_done(napi, frames_processed); return frames_processed; } - if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num])) + if (!pending_scrq(adapter, rx_scrq)) break; - next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]); + next = ibmvnic_next_scrq(adapter, rx_scrq); rx_buff = (struct ibmvnic_rx_buff *)be64_to_cpu(next-> rx_comp.correlator); @@ -2448,16 +2537,21 @@ restart_poll: frames_processed++; } - if (adapter->state != VNIC_CLOSING) + if (adapter->state != VNIC_CLOSING && + ((atomic_read(&adapter->rx_pool[scrq_num].available) < + adapter->req_rx_add_entries_per_subcrq / 2) || + frames_processed < budget)) replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); - if (frames_processed < budget) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - napi_complete_done(napi, frames_processed); - if (pending_scrq(adapter, adapter->rx_scrq[scrq_num]) && - napi_reschedule(napi)) { - disable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - goto restart_poll; + if (napi_complete_done(napi, frames_processed)) { + enable_scrq_irq(adapter, rx_scrq); + if (pending_scrq(adapter, rx_scrq)) { + rmb(); + if (napi_reschedule(napi)) { + disable_scrq_irq(adapter, rx_scrq); + goto restart_poll; + } + } } } return frames_processed; @@ -2858,6 +2952,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, memset(scrq->msgs, 0, 4 * PAGE_SIZE); atomic_set(&scrq->used, 0); scrq->cur = 0; + scrq->ind_buf.index = 0; rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); @@ -2909,6 +3004,11 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } } + dma_free_coherent(dev, + IBMVNIC_IND_ARR_SZ, + scrq->ind_buf.indir_arr, + scrq->ind_buf.indir_dma); + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); free_pages((unsigned long)scrq->msgs, 2); @@ -2955,6 +3055,17 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); + scrq->ind_buf.index = 0; + + scrq->ind_buf.indir_arr = + dma_alloc_coherent(dev, + IBMVNIC_IND_ARR_SZ, + &scrq->ind_buf.indir_dma, + GFP_KERNEL); + + if (!scrq->ind_buf.indir_arr) + goto indir_failed; + spin_lock_init(&scrq->lock); netdev_dbg(adapter->netdev, @@ -2963,6 +3074,12 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter return scrq; +indir_failed: + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc)); reg_failed: dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -3077,14 +3194,17 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_buff *txbuff; + struct netdev_queue *txq; union sub_crq *next; int index; - int i, j; + int i; restart_loop: while (pending_scrq(adapter, scrq)) { unsigned int pool = scrq->pool_index; int num_entries = 0; + int total_bytes = 0; + int num_packets = 0; next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { @@ -3102,21 +3222,16 @@ restart_loop: } txbuff = &tx_pool->tx_buff[index]; - - for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) { - if (!txbuff->data_dma[j]) - continue; - - txbuff->data_dma[j] = 0; - } - - if (txbuff->last_frag) { - dev_kfree_skb_any(txbuff->skb); + num_packets++; + num_entries += txbuff->num_entries; + if (txbuff->skb) { + total_bytes += txbuff->skb->len; + dev_consume_skb_irq(txbuff->skb); txbuff->skb = NULL; + } else { + netdev_warn(adapter->netdev, + "TX completion received with NULL socket buffer\n"); } - - num_entries += txbuff->num_entries; - tx_pool->free_map[tx_pool->producer_index] = index; tx_pool->producer_index = (tx_pool->producer_index + 1) % @@ -3125,6 +3240,9 @@ restart_loop: /* remove tx_comp scrq*/ next->tx_comp.first = 0; + txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); + netdev_tx_completed_queue(txq, num_packets, total_bytes); + if (atomic_sub_return(num_entries, &scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && __netif_subqueue_stopped(adapter->netdev, @@ -3524,38 +3642,6 @@ static void print_subcrq_error(struct device *dev, int rc, const char *func) } } -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq) -{ - unsigned int ua = adapter->vdev->unit_address; - struct device *dev = &adapter->vdev->dev; - u64 *u64_crq = (u64 *)sub_crq; - int rc; - - netdev_dbg(adapter->netdev, - "Sending sCRQ %016lx: %016lx %016lx %016lx %016lx\n", - (unsigned long int)cpu_to_be64(remote_handle), - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1]), - (unsigned long int)cpu_to_be64(u64_crq[2]), - (unsigned long int)cpu_to_be64(u64_crq[3])); - - /* Make sure the hypervisor sees the complete request */ - mb(); - - rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua, - cpu_to_be64(remote_handle), - cpu_to_be64(u64_crq[0]), - cpu_to_be64(u64_crq[1]), - cpu_to_be64(u64_crq[2]), - cpu_to_be64(u64_crq[3])); - - if (rc) - print_subcrq_error(dev, rc, __func__); - - return rc; -} - static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, u64 remote_handle, u64 ioba, u64 num_entries) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 217dcc7ded70..9911d926dd7f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -31,6 +31,8 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 +#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 @@ -224,8 +226,6 @@ struct ibmvnic_tx_comp_desc { #define IBMVNIC_TCP_CHKSUM 0x20 #define IBMVNIC_UDP_CHKSUM 0x08 -#define IBMVNIC_MAX_FRAGS_PER_CRQ 3 - struct ibmvnic_tx_desc { u8 first; u8 type; @@ -861,6 +861,12 @@ union sub_crq { struct ibmvnic_rx_buff_add_desc rx_add; }; +struct ibmvnic_ind_xmit_queue { + union sub_crq *indir_arr; + dma_addr_t indir_dma; + int index; +}; + struct ibmvnic_sub_crq_queue { union sub_crq *msgs; int size, cur; @@ -873,10 +879,11 @@ struct ibmvnic_sub_crq_queue { spinlock_t lock; struct sk_buff *rx_skb_top; struct ibmvnic_adapter *adapter; + struct ibmvnic_ind_xmit_queue ind_buf; atomic_t used; char name[32]; u64 handle; -}; +} ____cacheline_aligned; struct ibmvnic_long_term_buff { unsigned char *buff; @@ -887,14 +894,8 @@ struct ibmvnic_long_term_buff { struct ibmvnic_tx_buff { struct sk_buff *skb; - dma_addr_t data_dma[IBMVNIC_MAX_FRAGS_PER_CRQ]; - unsigned int data_len[IBMVNIC_MAX_FRAGS_PER_CRQ]; int index; int pool_index; - bool last_frag; - union sub_crq indir_arr[6]; - u8 hdr_data[140]; - dma_addr_t indir_dma; int num_entries; }; @@ -906,7 +907,7 @@ struct ibmvnic_tx_pool { struct ibmvnic_long_term_buff long_term_buff; int num_buffers; int buf_size; -}; +} ____cacheline_aligned; struct ibmvnic_rx_buff { struct sk_buff *skb; @@ -927,7 +928,7 @@ struct ibmvnic_rx_pool { int next_alloc; int active; struct ibmvnic_long_term_buff long_term_buff; -}; +} ____cacheline_aligned; struct ibmvnic_vpd { unsigned char *buff; @@ -1014,8 +1015,8 @@ struct ibmvnic_adapter { atomic_t running_cap_crqs; bool wait_capability; - struct ibmvnic_sub_crq_queue **tx_scrq; - struct ibmvnic_sub_crq_queue **rx_scrq; + struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned; + struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned; /* rx structs */ struct napi_struct *napi; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index ffc681b67f1c..7100d1dd856e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -9,4 +9,5 @@ obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o octeontx2_mbox-y := mbox.o rvu_trace.o octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ - rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o + rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ + rvu_cpt.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index cb4e3d86b58b..221fe5b7c093 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -158,6 +158,11 @@ M(NPA_HWCTX_DISABLE, 0x403, npa_hwctx_disable, hwctx_disable_req, msg_rsp)\ /* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */ \ /* TIM mbox IDs (range 0x800 - 0x9FF) */ \ /* CPT mbox IDs (range 0xA00 - 0xBFF) */ \ +M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, \ + msg_rsp) \ +M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp) \ +M(CPT_RD_WR_REGISTER, 0xA02, cpt_rd_wr_register, cpt_rd_wr_reg_msg, \ + cpt_rd_wr_reg_msg) \ /* NPC mbox IDs (range 0x6000 - 0x7FFF) */ \ M(NPC_MCAM_ALLOC_ENTRY, 0x6000, npc_mcam_alloc_entry, npc_mcam_alloc_entry_req,\ npc_mcam_alloc_entry_rsp) \ @@ -1046,4 +1051,32 @@ struct ptp_rsp { u64 clk; }; +/* CPT mailbox error codes + * Range 901 - 1000. + */ +enum cpt_af_status { + CPT_AF_ERR_PARAM = -901, + CPT_AF_ERR_GRP_INVALID = -902, + CPT_AF_ERR_LF_INVALID = -903, + CPT_AF_ERR_ACCESS_DENIED = -904, + CPT_AF_ERR_SSO_PF_FUNC_INVALID = -905, + CPT_AF_ERR_NIX_PF_FUNC_INVALID = -906 +}; + +/* CPT mbox message formats */ +struct cpt_rd_wr_reg_msg { + struct mbox_msghdr hdr; + u64 reg_offset; + u64 *ret_val; + u64 val; + u8 is_write; +}; + +struct cpt_lf_alloc_req_msg { + struct mbox_msghdr hdr; + u16 nix_pf_func; + u16 sso_pf_func; + u16 eng_grpmsk; +}; + #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 37774bac32b0..b6c0977499ab 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -52,6 +52,7 @@ struct rvu_debugfs { struct dentry *npa; struct dentry *nix; struct dentry *npc; + struct dentry *cpt; struct dump_ctx npa_aura_ctx; struct dump_ctx npa_pool_ctx; struct dump_ctx nix_cq_ctx; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c new file mode 100644 index 000000000000..35261d52c997 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include <linux/pci.h> +#include "rvu_struct.h" +#include "rvu_reg.h" +#include "mbox.h" +#include "rvu.h" + +/* CPT PF device id */ +#define PCI_DEVID_OTX2_CPT_PF 0xA0FD + +static int get_cpt_pf_num(struct rvu *rvu) +{ + int i, domain_nr, cpt_pf_num = -1; + struct pci_dev *pdev; + + domain_nr = pci_domain_nr(rvu->pdev->bus); + for (i = 0; i < rvu->hw->total_pfs; i++) { + pdev = pci_get_domain_bus_and_slot(domain_nr, i + 1, 0); + if (!pdev) + continue; + + if (pdev->device == PCI_DEVID_OTX2_CPT_PF) { + cpt_pf_num = i; + put_device(&pdev->dev); + break; + } + put_device(&pdev->dev); + } + return cpt_pf_num; +} + +static bool is_cpt_pf(struct rvu *rvu, u16 pcifunc) +{ + int cpt_pf_num = get_cpt_pf_num(rvu); + + if (rvu_get_pf(pcifunc) != cpt_pf_num) + return false; + if (pcifunc & RVU_PFVF_FUNC_MASK) + return false; + + return true; +} + +static bool is_cpt_vf(struct rvu *rvu, u16 pcifunc) +{ + int cpt_pf_num = get_cpt_pf_num(rvu); + + if (rvu_get_pf(pcifunc) != cpt_pf_num) + return false; + if (!(pcifunc & RVU_PFVF_FUNC_MASK)) + return false; + + return true; +} + +int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, + struct cpt_lf_alloc_req_msg *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + int cptlf, blkaddr; + int num_lfs, slot; + u64 val; + + if (req->eng_grpmsk == 0x0) + return CPT_AF_ERR_GRP_INVALID; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return blkaddr; + + block = &rvu->hw->block[blkaddr]; + num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc), + block->addr); + if (!num_lfs) + return CPT_AF_ERR_LF_INVALID; + + /* Check if requested 'CPTLF <=> NIXLF' mapping is valid */ + if (req->nix_pf_func) { + /* If default, use 'this' CPTLF's PFFUNC */ + if (req->nix_pf_func == RVU_DEFAULT_PF_FUNC) + req->nix_pf_func = pcifunc; + if (!is_pffunc_map_valid(rvu, req->nix_pf_func, BLKTYPE_NIX)) + return CPT_AF_ERR_NIX_PF_FUNC_INVALID; + } + + /* Check if requested 'CPTLF <=> SSOLF' mapping is valid */ + if (req->sso_pf_func) { + /* If default, use 'this' CPTLF's PFFUNC */ + if (req->sso_pf_func == RVU_DEFAULT_PF_FUNC) + req->sso_pf_func = pcifunc; + if (!is_pffunc_map_valid(rvu, req->sso_pf_func, BLKTYPE_SSO)) + return CPT_AF_ERR_SSO_PF_FUNC_INVALID; + } + + for (slot = 0; slot < num_lfs; slot++) { + cptlf = rvu_get_lf(rvu, block, pcifunc, slot); + if (cptlf < 0) + return CPT_AF_ERR_LF_INVALID; + + /* Set CPT LF group and priority */ + val = (u64)req->eng_grpmsk << 48 | 1; + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), val); + + /* Set CPT LF NIX_PF_FUNC and SSO_PF_FUNC */ + val = (u64)req->nix_pf_func << 48 | + (u64)req->sso_pf_func << 32; + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf), val); + } + + return 0; +} + +int rvu_mbox_handler_cpt_lf_free(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + int cptlf, blkaddr; + int num_lfs, slot; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return blkaddr; + + block = &rvu->hw->block[blkaddr]; + num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc), + block->addr); + if (!num_lfs) + return CPT_AF_ERR_LF_INVALID; + + for (slot = 0; slot < num_lfs; slot++) { + cptlf = rvu_get_lf(rvu, block, pcifunc, slot); + if (cptlf < 0) + return CPT_AF_ERR_LF_INVALID; + + /* Reset CPT LF group and priority */ + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), 0x0); + /* Reset CPT LF NIX_PF_FUNC and SSO_PF_FUNC */ + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf), 0x0); + } + + return 0; +} + +static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) +{ + u64 offset = req->reg_offset; + int blkaddr, num_lfs, lf; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + + /* Registers that can be accessed from PF/VF */ + if ((offset & 0xFF000) == CPT_AF_LFX_CTL(0) || + (offset & 0xFF000) == CPT_AF_LFX_CTL2(0)) { + if (offset & 7) + return false; + + lf = (offset & 0xFFF) >> 3; + block = &rvu->hw->block[blkaddr]; + pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + num_lfs = rvu_get_rsrc_mapcount(pfvf, block->addr); + if (lf >= num_lfs) + /* Slot is not valid for that PF/VF */ + return false; + + /* Translate local LF used by VFs to global CPT LF */ + lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], + req->hdr.pcifunc, lf); + if (lf < 0) + return false; + + return true; + } else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) { + /* Registers that can be accessed from PF */ + switch (offset) { + case CPT_AF_CTL: + case CPT_AF_PF_FUNC: + case CPT_AF_BLK_RST: + case CPT_AF_CONSTANTS1: + return true; + } + + switch (offset & 0xFF000) { + case CPT_AF_EXEX_STS(0): + case CPT_AF_EXEX_CTL(0): + case CPT_AF_EXEX_CTL2(0): + case CPT_AF_EXEX_UCODE_BASE(0): + if (offset & 7) + return false; + break; + default: + return false; + } + return true; + } + return false; +} + +int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, + struct cpt_rd_wr_reg_msg *req, + struct cpt_rd_wr_reg_msg *rsp) +{ + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return blkaddr; + + /* This message is accepted only if sent from CPT PF/VF */ + if (!is_cpt_pf(rvu, req->hdr.pcifunc) && + !is_cpt_vf(rvu, req->hdr.pcifunc)) + return CPT_AF_ERR_ACCESS_DENIED; + + rsp->reg_offset = req->reg_offset; + rsp->ret_val = req->ret_val; + rsp->is_write = req->is_write; + + if (!is_valid_offset(rvu, req)) + return CPT_AF_ERR_ACCESS_DENIED; + + if (req->is_write) + rvu_write64(rvu, blkaddr, req->reg_offset, req->val); + else + rsp->val = rvu_read64(rvu, blkaddr, req->reg_offset); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 39e1a614aaf8..c383efc6b90c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -109,6 +109,12 @@ static char *cgx_tx_stats_fields[] = { [CGX_STAT17] = "Control/PAUSE packets sent", }; +enum cpt_eng_type { + CPT_AE_TYPE = 1, + CPT_SE_TYPE = 2, + CPT_IE_TYPE = 3, +}; + #define NDC_MAX_BANK(rvu, blk_addr) (rvu_read64(rvu, \ blk_addr, NDC_AF_CONST) & 0xFF) @@ -1993,6 +1999,271 @@ create_failed: debugfs_remove_recursive(rvu->rvu_dbg.npc); } +/* CPT debugfs APIs */ +static int cpt_eng_sts_display(struct seq_file *filp, u8 eng_type) +{ + struct rvu *rvu = filp->private; + u64 busy_sts = 0, free_sts = 0; + u32 e_min = 0, e_max = 0, e, i; + u16 max_ses, max_ies, max_aes; + int blkaddr; + u64 reg; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return -ENODEV; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS1); + max_ses = reg & 0xffff; + max_ies = (reg >> 16) & 0xffff; + max_aes = (reg >> 32) & 0xffff; + + switch (eng_type) { + case CPT_AE_TYPE: + e_min = max_ses + max_ies; + e_max = max_ses + max_ies + max_aes; + break; + case CPT_SE_TYPE: + e_min = 0; + e_max = max_ses; + break; + case CPT_IE_TYPE: + e_min = max_ses; + e_max = max_ses + max_ies; + break; + default: + return -EINVAL; + } + + for (e = e_min, i = 0; e < e_max; e++, i++) { + reg = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_STS(e)); + if (reg & 0x1) + busy_sts |= 1ULL << i; + + if (reg & 0x2) + free_sts |= 1ULL << i; + } + seq_printf(filp, "FREE STS : 0x%016llx\n", free_sts); + seq_printf(filp, "BUSY STS : 0x%016llx\n", busy_sts); + + return 0; +} + +static int rvu_dbg_cpt_ae_sts_display(struct seq_file *filp, void *unused) +{ + return cpt_eng_sts_display(filp, CPT_AE_TYPE); +} + +RVU_DEBUG_SEQ_FOPS(cpt_ae_sts, cpt_ae_sts_display, NULL); + +static int rvu_dbg_cpt_se_sts_display(struct seq_file *filp, void *unused) +{ + return cpt_eng_sts_display(filp, CPT_SE_TYPE); +} + +RVU_DEBUG_SEQ_FOPS(cpt_se_sts, cpt_se_sts_display, NULL); + +static int rvu_dbg_cpt_ie_sts_display(struct seq_file *filp, void *unused) +{ + return cpt_eng_sts_display(filp, CPT_IE_TYPE); +} + +RVU_DEBUG_SEQ_FOPS(cpt_ie_sts, cpt_ie_sts_display, NULL); + +static int rvu_dbg_cpt_engines_info_display(struct seq_file *filp, void *unused) +{ + struct rvu *rvu = filp->private; + u16 max_ses, max_ies, max_aes; + u32 e_max, e; + int blkaddr; + u64 reg; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return -ENODEV; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS1); + max_ses = reg & 0xffff; + max_ies = (reg >> 16) & 0xffff; + max_aes = (reg >> 32) & 0xffff; + + e_max = max_ses + max_ies + max_aes; + + seq_puts(filp, "===========================================\n"); + for (e = 0; e < e_max; e++) { + reg = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL2(e)); + seq_printf(filp, "CPT Engine[%u] Group Enable 0x%02llx\n", e, + reg & 0xff); + reg = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_ACTIVE(e)); + seq_printf(filp, "CPT Engine[%u] Active Info 0x%llx\n", e, + reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL(e)); + seq_printf(filp, "CPT Engine[%u] Control 0x%llx\n", e, + reg); + seq_puts(filp, "===========================================\n"); + } + return 0; +} + +RVU_DEBUG_SEQ_FOPS(cpt_engines_info, cpt_engines_info_display, NULL); + +static int rvu_dbg_cpt_lfs_info_display(struct seq_file *filp, void *unused) +{ + struct rvu *rvu = filp->private; + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr; + u64 reg; + u32 lf; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return -ENODEV; + + block = &hw->block[blkaddr]; + if (!block->lf.bmap) + return -ENODEV; + + seq_puts(filp, "===========================================\n"); + for (lf = 0; lf < block->lf.max; lf++) { + reg = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL(lf)); + seq_printf(filp, "CPT Lf[%u] CTL 0x%llx\n", lf, reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL2(lf)); + seq_printf(filp, "CPT Lf[%u] CTL2 0x%llx\n", lf, reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_LFX_PTR_CTL(lf)); + seq_printf(filp, "CPT Lf[%u] PTR_CTL 0x%llx\n", lf, reg); + reg = rvu_read64(rvu, blkaddr, block->lfcfg_reg | + (lf << block->lfshift)); + seq_printf(filp, "CPT Lf[%u] CFG 0x%llx\n", lf, reg); + seq_puts(filp, "===========================================\n"); + } + return 0; +} + +RVU_DEBUG_SEQ_FOPS(cpt_lfs_info, cpt_lfs_info_display, NULL); + +static int rvu_dbg_cpt_err_info_display(struct seq_file *filp, void *unused) +{ + struct rvu *rvu = filp->private; + u64 reg0, reg1; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return -ENODEV; + + reg0 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(0)); + reg1 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(1)); + seq_printf(filp, "CPT_AF_FLTX_INT: 0x%llx 0x%llx\n", reg0, reg1); + reg0 = rvu_read64(rvu, blkaddr, CPT_AF_PSNX_EXE(0)); + reg1 = rvu_read64(rvu, blkaddr, CPT_AF_PSNX_EXE(1)); + seq_printf(filp, "CPT_AF_PSNX_EXE: 0x%llx 0x%llx\n", reg0, reg1); + reg0 = rvu_read64(rvu, blkaddr, CPT_AF_PSNX_LF(0)); + seq_printf(filp, "CPT_AF_PSNX_LF: 0x%llx\n", reg0); + reg0 = rvu_read64(rvu, blkaddr, CPT_AF_RVU_INT); + seq_printf(filp, "CPT_AF_RVU_INT: 0x%llx\n", reg0); + reg0 = rvu_read64(rvu, blkaddr, CPT_AF_RAS_INT); + seq_printf(filp, "CPT_AF_RAS_INT: 0x%llx\n", reg0); + reg0 = rvu_read64(rvu, blkaddr, CPT_AF_EXE_ERR_INFO); + seq_printf(filp, "CPT_AF_EXE_ERR_INFO: 0x%llx\n", reg0); + + return 0; +} + +RVU_DEBUG_SEQ_FOPS(cpt_err_info, cpt_err_info_display, NULL); + +static int rvu_dbg_cpt_pc_display(struct seq_file *filp, void *unused) +{ + struct rvu *rvu; + int blkaddr; + u64 reg; + + rvu = filp->private; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + if (blkaddr < 0) + return -ENODEV; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_INST_REQ_PC); + seq_printf(filp, "CPT instruction requests %llu\n", reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_INST_LATENCY_PC); + seq_printf(filp, "CPT instruction latency %llu\n", reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_RD_REQ_PC); + seq_printf(filp, "CPT NCB read requests %llu\n", reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_RD_LATENCY_PC); + seq_printf(filp, "CPT NCB read latency %llu\n", reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_RD_UC_PC); + seq_printf(filp, "CPT read requests caused by UC fills %llu\n", reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_ACTIVE_CYCLES_PC); + seq_printf(filp, "CPT active cycles pc %llu\n", reg); + reg = rvu_read64(rvu, blkaddr, CPT_AF_CPTCLK_CNT); + seq_printf(filp, "CPT clock count pc %llu\n", reg); + + return 0; +} + +RVU_DEBUG_SEQ_FOPS(cpt_pc, cpt_pc_display, NULL); + +static void rvu_dbg_cpt_init(struct rvu *rvu) +{ + const struct device *dev = &rvu->pdev->dev; + struct dentry *pfile; + + if (!is_block_implemented(rvu->hw, BLKADDR_CPT0)) + return; + + rvu->rvu_dbg.cpt = debugfs_create_dir("cpt", rvu->rvu_dbg.root); + if (!rvu->rvu_dbg.cpt) + return; + + pfile = debugfs_create_file("cpt_pc", 0600, + rvu->rvu_dbg.cpt, rvu, + &rvu_dbg_cpt_pc_fops); + if (!pfile) + goto create_failed; + + pfile = debugfs_create_file("cpt_ae_sts", 0600, + rvu->rvu_dbg.cpt, rvu, + &rvu_dbg_cpt_ae_sts_fops); + if (!pfile) + goto create_failed; + + pfile = debugfs_create_file("cpt_se_sts", 0600, + rvu->rvu_dbg.cpt, rvu, + &rvu_dbg_cpt_se_sts_fops); + if (!pfile) + goto create_failed; + + pfile = debugfs_create_file("cpt_ie_sts", 0600, + rvu->rvu_dbg.cpt, rvu, + &rvu_dbg_cpt_ie_sts_fops); + if (!pfile) + goto create_failed; + + pfile = debugfs_create_file("cpt_engines_info", 0600, + rvu->rvu_dbg.cpt, rvu, + &rvu_dbg_cpt_engines_info_fops); + if (!pfile) + goto create_failed; + + pfile = debugfs_create_file("cpt_lfs_info", 0600, + rvu->rvu_dbg.cpt, rvu, + &rvu_dbg_cpt_lfs_info_fops); + if (!pfile) + goto create_failed; + + pfile = debugfs_create_file("cpt_err_info", 0600, + rvu->rvu_dbg.cpt, rvu, + &rvu_dbg_cpt_err_info_fops); + if (!pfile) + goto create_failed; + + return; + +create_failed: + dev_err(dev, "Failed to create debugfs dir/file for CPT\n"); + debugfs_remove_recursive(rvu->rvu_dbg.cpt); +} + void rvu_dbg_init(struct rvu *rvu) { struct device *dev = &rvu->pdev->dev; @@ -2019,6 +2290,7 @@ void rvu_dbg_init(struct rvu *rvu) rvu_dbg_nix_init(rvu, BLKADDR_NIX1); rvu_dbg_cgx_init(rvu); rvu_dbg_npc_init(rvu); + rvu_dbg_cpt_init(rvu); return; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index e8d039503097..739b37034bdf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2085,7 +2085,7 @@ static int nix_tx_vtag_decfg(struct rvu *rvu, int blkaddr, u16 pcifunc = req->hdr.pcifunc; int idx0 = req->tx.vtag0_idx; int idx1 = req->tx.vtag1_idx; - int err; + int err = 0; if (req->tx.free_vtag0 && req->tx.free_vtag1) if (vlan->entry2pfvf_map[idx0] != pcifunc || diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 4ddfdff33a61..14832b66d1fe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1218,11 +1218,13 @@ int rvu_mbox_handler_npc_delete_flow(struct rvu *rvu, mutex_unlock(&mcam->lock); list_for_each_entry_safe(iter, tmp, &del_list, list) { + u16 entry = iter->entry; + /* clear the mcam entry target pcifunc */ - mcam->entry2target_pffunc[iter->entry] = 0x0; + mcam->entry2target_pffunc[entry] = 0x0; if (npc_delete_flow(rvu, iter, pcifunc)) - dev_err(rvu->dev, "rule deletion failed for entry:%d", - iter->entry); + dev_err(rvu->dev, "rule deletion failed for entry:%u", + entry); } return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 1f3379f12b81..0fb2aa909a23 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -429,12 +429,63 @@ #define TIM_AF_LF_RST (0x20) /* CPT */ -#define CPT_AF_CONSTANTS0 (0x0000) -#define CPT_PRIV_LFX_CFG (0x41000) -#define CPT_PRIV_LFX_INT_CFG (0x43000) -#define CPT_AF_RVU_LF_CFG_DEBUG (0x45000) -#define CPT_AF_LF_RST (0x44000) -#define CPT_AF_BLK_RST (0x46000) +#define CPT_AF_CONSTANTS0 (0x0000) +#define CPT_AF_CONSTANTS1 (0x1000) +#define CPT_AF_DIAG (0x3000) +#define CPT_AF_ECO (0x4000) +#define CPT_AF_FLTX_INT(a) (0xa000ull | (u64)(a) << 3) +#define CPT_AF_FLTX_INT_W1S(a) (0xb000ull | (u64)(a) << 3) +#define CPT_AF_FLTX_INT_ENA_W1C(a) (0xc000ull | (u64)(a) << 3) +#define CPT_AF_FLTX_INT_ENA_W1S(a) (0xd000ull | (u64)(a) << 3) +#define CPT_AF_PSNX_EXE(a) (0xe000ull | (u64)(a) << 3) +#define CPT_AF_PSNX_EXE_W1S(a) (0xf000ull | (u64)(a) << 3) +#define CPT_AF_PSNX_LF(a) (0x10000ull | (u64)(a) << 3) +#define CPT_AF_PSNX_LF_W1S(a) (0x11000ull | (u64)(a) << 3) +#define CPT_AF_EXEX_CTL2(a) (0x12000ull | (u64)(a) << 3) +#define CPT_AF_EXEX_STS(a) (0x13000ull | (u64)(a) << 3) +#define CPT_AF_EXE_ERR_INFO (0x14000) +#define CPT_AF_EXEX_ACTIVE(a) (0x16000ull | (u64)(a) << 3) +#define CPT_AF_INST_REQ_PC (0x17000) +#define CPT_AF_INST_LATENCY_PC (0x18000) +#define CPT_AF_RD_REQ_PC (0x19000) +#define CPT_AF_RD_LATENCY_PC (0x1a000) +#define CPT_AF_RD_UC_PC (0x1b000) +#define CPT_AF_ACTIVE_CYCLES_PC (0x1c000) +#define CPT_AF_EXE_DBG_CTL (0x1d000) +#define CPT_AF_EXE_DBG_DATA (0x1e000) +#define CPT_AF_EXE_REQ_TIMER (0x1f000) +#define CPT_AF_EXEX_CTL(a) (0x20000ull | (u64)(a) << 3) +#define CPT_AF_EXE_PERF_CTL (0x21000) +#define CPT_AF_EXE_DBG_CNTX(a) (0x22000ull | (u64)(a) << 3) +#define CPT_AF_EXE_PERF_EVENT_CNT (0x23000) +#define CPT_AF_EXE_EPCI_INBX_CNT(a) (0x24000ull | (u64)(a) << 3) +#define CPT_AF_EXE_EPCI_OUTBX_CNT(a) (0x25000ull | (u64)(a) << 3) +#define CPT_AF_EXEX_UCODE_BASE(a) (0x26000ull | (u64)(a) << 3) +#define CPT_AF_LFX_CTL(a) (0x27000ull | (u64)(a) << 3) +#define CPT_AF_LFX_CTL2(a) (0x29000ull | (u64)(a) << 3) +#define CPT_AF_CPTCLK_CNT (0x2a000) +#define CPT_AF_PF_FUNC (0x2b000) +#define CPT_AF_LFX_PTR_CTL(a) (0x2c000ull | (u64)(a) << 3) +#define CPT_AF_GRPX_THR(a) (0x2d000ull | (u64)(a) << 3) +#define CPT_AF_CTL (0x2e000ull) +#define CPT_AF_XEX_THR(a) (0x2f000ull | (u64)(a) << 3) +#define CPT_PRIV_LFX_CFG (0x41000) +#define CPT_PRIV_AF_INT_CFG (0x42000) +#define CPT_PRIV_LFX_INT_CFG (0x43000) +#define CPT_AF_LF_RST (0x44000) +#define CPT_AF_RVU_LF_CFG_DEBUG (0x45000) +#define CPT_AF_BLK_RST (0x46000) +#define CPT_AF_RVU_INT (0x47000) +#define CPT_AF_RVU_INT_W1S (0x47008) +#define CPT_AF_RVU_INT_ENA_W1S (0x47010) +#define CPT_AF_RVU_INT_ENA_W1C (0x47018) +#define CPT_AF_RAS_INT (0x47020) +#define CPT_AF_RAS_INT_W1S (0x47028) +#define CPT_AF_RAS_INT_ENA_W1S (0x47030) +#define CPT_AF_RAS_INT_ENA_W1C (0x47038) + +#define CPT_AF_LF_CTL2_SHIFT 3 +#define CPT_AF_LF_SSO_PF_FUNC_SHIFT 32 #define NPC_AF_BLK_RST (0x00040) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index b18b45d02165..ceec649bdd13 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -16,6 +16,7 @@ #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> +#include <linux/soc/marvell/octeontx2/asm.h> #include <mbox.h> #include <npc.h> @@ -462,21 +463,9 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) return result; } -static inline u64 otx2_lmt_flush(uint64_t addr) -{ - u64 result = 0; - - __asm__ volatile(".cpu generic+lse\n" - "ldeor xzr,%x[rf],[%[rs]]" - : [rf]"=r"(result) - : [rs]"r"(addr)); - return result; -} - #else #define otx2_write128(lo, hi, addr) #define otx2_atomic64_add(incr, ptr) ({ *ptr += incr; }) -#define otx2_lmt_flush(addr) ({ 0; }) #endif /* Alloc pointer from pool/aura */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 4c82f60f3cf3..634d60655a74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2084,7 +2084,7 @@ static int otx2_do_set_vf_vlan(struct otx2_nic *pf, int vf, u16 vlan, u8 qos, vtag_req->vtag_size = VTAGSIZE_T4; vtag_req->cfg_type = 0; /* tx vlan cfg */ vtag_req->tx.cfg_vtag0 = 1; - vtag_req->tx.vtag0 = (ntohs(proto) << 16) | vlan; + vtag_req->tx.vtag0 = ((u64)ntohs(proto) << 16) | vlan; err = otx2_sync_mbox_msg(&pf->mbox); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 106513f772c3..157f7eef92f1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2027,7 +2027,6 @@ static void mlx4_en_clear_stats(struct net_device *dev) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); - memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); memset(&priv->rx_flowstats, 0, sizeof(priv->rx_flowstats)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b0f79a5151cf..55fc33de4ce7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -914,7 +914,6 @@ next: wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; } - AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_en_refill_rx_buffers(priv, ring); @@ -966,8 +965,6 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* in case we got here because of !clean_complete */ done = budget; - INC_PERF_COUNTER(priv->pstats.napi_quota); - cpu_curr = smp_processor_id(); idata = irq_desc_get_irq_data(cq->irq_desc); aff = irq_data_get_affinity_mask(idata); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ddb7268e415..b15ec32758a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -864,9 +864,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!priv->port_up)) goto tx_drop; - /* fetch ring->cons far ahead before needing it to avoid stall */ - ring_cons = READ_ONCE(ring->cons); - real_size = get_real_size(skb, shinfo, dev, &lso_header_size, &inline_ok, &fragptr); if (unlikely(!real_size)) @@ -898,10 +895,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); - /* Track current inflight packets for performance analysis */ - AVG_PERF_COUNTER(priv->pstats.inflight_avg, - (u32)(ring->prod - ring_cons - 1)); - /* Packet is good - grab an index and transmit it */ index = ring->prod & ring->size_mask; bf_index = ring->prod; @@ -1012,7 +1005,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->packets++; } ring->bytes += tx_info->nr_bytes; - AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); if (tx_info->inl) build_inline_wqe(tx_desc, skb, shinfo, fragptr); @@ -1141,10 +1133,6 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, index = ring->prod & ring->size_mask; tx_info = &ring->tx_info[index]; - /* Track current inflight packets for performance analysis */ - AVG_PERF_COUNTER(priv->pstats.inflight_avg, - (u32)(ring->prod - READ_ONCE(ring->cons) - 1)); - tx_desc = ring->buf + (index << LOG_TXBB_SIZE); data = &tx_desc->data; @@ -1169,7 +1157,6 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); rx_ring->xdp_tx++; - AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); ring->prod += MLX4_EN_XDP_TX_NRTXBB; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a46efe37cfa9..014ce8d3d97b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -170,27 +170,6 @@ #define MLX4_EN_LOOPBACK_RETRIES 5 #define MLX4_EN_LOOPBACK_TIMEOUT 100 -#ifdef MLX4_EN_PERF_STAT -/* Number of samples to 'average' */ -#define AVG_SIZE 128 -#define AVG_FACTOR 1024 - -#define INC_PERF_COUNTER(cnt) (++(cnt)) -#define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add)) -#define AVG_PERF_COUNTER(cnt, sample) \ - ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE) -#define GET_PERF_COUNTER(cnt) (cnt) -#define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR) - -#else - -#define INC_PERF_COUNTER(cnt) do {} while (0) -#define ADD_PERF_COUNTER(cnt, add) do {} while (0) -#define AVG_PERF_COUNTER(cnt, sample) do {} while (0) -#define GET_PERF_COUNTER(cnt) (0) -#define GET_AVG_PERF_COUNTER(cnt) (0) -#endif /* MLX4_EN_PERF_STAT */ - /* Constants for TX flow */ enum { MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ @@ -599,7 +578,6 @@ struct mlx4_en_priv { struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; - struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_counter_stats pf_stats; struct mlx4_en_flow_stats_rx rx_priority_flowstats[MLX4_NUM_PRIORITIES]; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 51d4eaab6a2f..7b51ae8cf759 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -2,12 +2,6 @@ #ifndef _MLX4_STATS_ #define _MLX4_STATS_ -#ifdef MLX4_EN_PERF_STAT -#define NUM_PERF_STATS NUM_PERF_COUNTERS -#else -#define NUM_PERF_STATS 0 -#endif - #define NUM_PRIORITIES 9 #define NUM_PRIORITY_STATS 2 @@ -46,16 +40,6 @@ struct mlx4_en_port_stats { #define NUM_PORT_STATS 10 }; -struct mlx4_en_perf_stats { - u32 tx_poll; - u64 tx_pktsz_avg; - u32 inflight_avg; - u16 tx_coal_avg; - u16 rx_coal_avg; - u32 napi_quota; -#define NUM_PERF_COUNTERS 6 -}; - struct mlx4_en_xdp_stats { unsigned long rx_xdp_drop; unsigned long rx_xdp_tx; @@ -135,7 +119,7 @@ enum { }; #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \ - NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \ + NUM_FLOW_STATS + NUM_PF_STATS + \ NUM_XDP_STATS + NUM_PHY_STATS) #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 147dd8fab2af..42a7bec3fd88 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2873,6 +2873,7 @@ struct mlxsw_sp_nexthop { enum mlxsw_sp_nexthop_group_type { MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4, MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6, + MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ, }; struct mlxsw_sp_nexthop_group_info { @@ -2894,6 +2895,9 @@ struct mlxsw_sp_nexthop_group { struct { struct fib_info *fi; } ipv4; + struct { + u32 id; + } obj; }; struct mlxsw_sp_nexthop_group_info *nhgi; enum mlxsw_sp_nexthop_group_type type; @@ -3012,6 +3016,7 @@ struct mlxsw_sp_nexthop_group_cmp_arg { union { struct fib_info *fi; struct mlxsw_sp_fib6_entry *fib6_entry; + u32 id; }; }; @@ -3074,6 +3079,8 @@ mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return !mlxsw_sp_nexthop6_group_cmp(nh_grp, cmp_arg->fib6_entry); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return cmp_arg->id != nh_grp->obj.id; default: WARN_ON(1); return 1; @@ -3100,6 +3107,8 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed); } return jhash(&val, sizeof(val), seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed); default: WARN_ON(1); return 0; @@ -3134,6 +3143,8 @@ mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed); default: WARN_ON(1); return 0; @@ -3539,6 +3550,25 @@ mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, } static void +mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + /* Do not update the flags if the nexthop group is being destroyed + * since: + * 1. The nexthop objects is being deleted, in which case the flags are + * irrelevant. + * 2. The nexthop group was replaced by a newer group, in which case + * the flags of the nexthop object were already updated based on the + * new group. + */ + if (nh_grp->can_destroy) + return; + + nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + nh_grp->nhgi->adj_index_valid, false); +} + +static void mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { @@ -3549,6 +3579,9 @@ mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp); break; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp); + break; } } @@ -4088,6 +4121,413 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } } +static int +mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_single_info *nh, + struct netlink_ext_ack *extack) +{ + int err = -EINVAL; + + if (nh->is_reject) + NL_SET_ERR_MSG_MOD(extack, "Blackhole nexthops are not supported"); + else if (nh->is_fdb) + NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported"); + else if (nh->has_encap) + NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported"); + else + err = 0; + + return err; +} + +static int +mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_grp_info *nh_grp, + struct netlink_ext_ack *extack) +{ + int i; + + if (nh_grp->is_fdb) { + NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported"); + return -EINVAL; + } + + for (i = 0; i < nh_grp->num_nh; i++) { + const struct nh_notifier_single_info *nh; + int err; + + nh = &nh_grp->nh_entries[i].nh; + err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, + extack); + if (err) + return err; + + /* Device only nexthops with an IPIP device are programmed as + * encapsulating adjacency entries. + */ + if (!nh->gw_family && + !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway"); + return -EINVAL; + } + } + + return 0; +} + +static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, + unsigned long event, + struct nh_notifier_info *info) +{ + if (event != NEXTHOP_EVENT_REPLACE) + return 0; + + if (!info->is_grp) + return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh, + info->extack); + return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, info->nh_grp, + info->extack); +} + +static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_info *info) +{ + const struct net_device *dev; + + if (info->is_grp) + /* Already validated earlier. */ + return true; + + dev = info->nh->dev; + return info->nh->gw_family || + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); +} + +static int +mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct nh_notifier_single_info *nh_obj, int weight) +{ + struct net_device *dev = nh_obj->dev; + int err; + + nh->nhgi = nh_grp->nhgi; + nh->nh_weight = weight; + + switch (nh_obj->gw_family) { + case AF_INET: + memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4)); + nh->neigh_tbl = &arp_tbl; + break; + case AF_INET6: + memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6)); +#if IS_ENABLED(CONFIG_IPV6) + nh->neigh_tbl = &nd_tbl; +#endif + break; + } + + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + nh->ifindex = dev->ifindex; + + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_type_init; + + return 0; + +err_type_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + return err; +} + +static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); +} + +static int +mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct nh_notifier_info *info) +{ + unsigned int nhs = info->is_grp ? info->nh_grp->num_nh : 1; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop *nh; + int err, i; + + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info); + nhgi->count = nhs; + for (i = 0; i < nhgi->count; i++) { + struct nh_notifier_single_info *nh_obj; + int weight; + + nh = &nhgi->nexthops[i]; + if (info->is_grp) { + nh_obj = &info->nh_grp->nh_entries[i].nh; + weight = info->nh_grp->nh_entries[i].weight; + } else { + nh_obj = info->nh; + weight = 1; + } + err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, + weight); + if (err) + goto err_nexthop_obj_init; + } + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device"); + goto err_group_refresh; + } + + return 0; + +err_group_refresh: + i = nhgi->count; +err_nexthop_obj_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ; + nh_grp->obj.id = info->id; + + err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info); + if (err) + goto err_nexthop_group_info_init; + + nh_grp->can_destroy = false; + + return nh_grp; + +err_nexthop_group_info_init: + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + if (!nh_grp->can_destroy) + return; + mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp); + WARN_ON_ONCE(!list_empty(&nh_grp->fib_list)); + kfree(nh_grp); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ; + cmp_arg.id = id; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group *old_nh_grp, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi; + struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi; + int err; + + old_nh_grp->nhgi = new_nhgi; + new_nhgi->nh_grp = old_nh_grp; + nh_grp->nhgi = old_nhgi; + old_nhgi->nh_grp = nh_grp; + + if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) { + /* Both the old adjacency index and the new one are valid. + * Routes are currently using the old one. Tell the device to + * replace the old adjacency index with the new one. + */ + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp, + old_nhgi->adj_index, + old_nhgi->ecmp_size); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one"); + goto err_out; + } + } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) { + /* The old adjacency index is valid, while the new one is not. + * Iterate over all the routes using the group and change them + * to trap packets to the CPU. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets"); + goto err_out; + } + } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) { + /* The old adjacency index is invalid, while the new one is. + * Iterate over all the routes using the group and change them + * to forward packets using the new valid index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets"); + goto err_out; + } + } + + /* Make sure the flags are set / cleared based on the new nexthop group + * information. + */ + mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp); + + /* At this point 'nh_grp' is just a shell that is not used by anyone + * and its nexthop group info is the old info that was just replaced + * with the new one. Remove it. + */ + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + + return 0; + +err_out: + old_nhgi->nh_grp = old_nh_grp; + nh_grp->nhgi = new_nhgi; + new_nhgi->nh_grp = nh_grp; + old_nh_grp->nhgi = old_nhgi; + return err; +} + +static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp; + struct netlink_ext_ack *extack = info->extack; + int err; + + nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + + old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!old_nh_grp) + err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp); + else + err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp, + old_nh_grp, extack); + + if (err) { + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + } + + return err; +} + +static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) + return; + + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + + /* If the group still has routes using it, then defer the delete + * operation until the last route using it is deleted. + */ + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); +} + +static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct nh_notifier_info *info = ptr; + struct mlxsw_sp_router *router; + int err = 0; + + router = container_of(nb, struct mlxsw_sp_router, nexthop_nb); + err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info); + if (err) + goto out; + + mutex_lock(&router->lock); + + ASSERT_RTNL(); + + switch (event) { + case NEXTHOP_EVENT_REPLACE: + err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info); + break; + case NEXTHOP_EVENT_DEL: + mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info); + break; + default: + break; + } + + mutex_unlock(&router->lock); + +out: + return notifier_from_errno(err); +} + static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { @@ -4208,12 +4648,21 @@ static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group *nh_grp; + if (fi->nh) { + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, + fi->nh->id); + if (WARN_ON_ONCE(!nh_grp)) + return -EINVAL; + goto out; + } + nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi); if (!nh_grp) { nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) return PTR_ERR(nh_grp); } +out: list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); fib_entry->nh_group = nh_grp; return 0; @@ -4227,6 +4676,12 @@ static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; + + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) { + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + return; + } + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); } @@ -5517,8 +5972,17 @@ mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry) { + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); struct mlxsw_sp_nexthop_group *nh_grp; + if (rt->nh) { + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, + rt->nh->id); + if (WARN_ON_ONCE(!nh_grp)) + return -EINVAL; + goto out; + } + nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry); if (!nh_grp) { nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry); @@ -5531,6 +5995,7 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, */ __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); +out: list_add_tail(&fib6_entry->common.nexthop_group_node, &nh_grp->fib_list); fib6_entry->common.nh_group = nh_grp; @@ -5546,6 +6011,12 @@ static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; + + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) { + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + return; + } + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); } @@ -6597,20 +7068,6 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); return notifier_from_errno(-EINVAL); } - if (fen_info->fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } - } else if (info->family == AF_INET6) { - struct fib6_entry_notifier_info *fen6_info; - - fen6_info = container_of(info, - struct fib6_entry_notifier_info, - info); - if (fen6_info->rt->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } } break; } @@ -8549,6 +9006,14 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_register_netevent_notifier; + mlxsw_sp->router->nexthop_nb.notifier_call = + mlxsw_sp_nexthop_obj_event; + err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb, + extack); + if (err) + goto err_register_nexthop_notifier; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp), &mlxsw_sp->router->fib_nb, @@ -8559,6 +9024,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, return 0; err_register_fib_notifier: + unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb); +err_register_nexthop_notifier: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: unregister_inet6addr_notifier(&router->inet6addr_nb); @@ -8598,6 +9066,8 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &mlxsw_sp->router->fib_nb); + unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb); unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 8230f6ff02ed..023f70827db0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -58,6 +58,7 @@ struct mlxsw_sp_router { struct list_head nexthop_neighs_list; struct list_head ipip_list; bool aborted; + struct notifier_block nexthop_nb; struct notifier_block fib_nb; struct notifier_block netevent_nb; struct notifier_block inetaddr_nb; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 1acf7128c146..32a4c8c0b3e6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4356,18 +4356,19 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, int budget) { unsigned int dirty_tx, bytes_compl = 0, pkts_compl = 0; + struct sk_buff *skb; dirty_tx = tp->dirty_tx; while (READ_ONCE(tp->cur_tx) != dirty_tx) { unsigned int entry = dirty_tx % NUM_TX_DESC; - struct sk_buff *skb = tp->tx_skb[entry].skb; u32 status; status = le32_to_cpu(tp->TxDescArray[entry].opts1); if (status & DescOwn) break; + skb = tp->tx_skb[entry].skb; rtl8169_unmap_tx_skb(tp, entry); if (skb) { @@ -4397,8 +4398,10 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, * too close. Let's kick an extra TxPoll request when a burst * of start_xmit activity is detected (if it is not detected, * it is slow enough). -- FR + * If skb is NULL then we come here again once a tx irq is + * triggered after the last fragment is marked transmitted. */ - if (tp->cur_tx != dirty_tx) + if (tp->cur_tx != dirty_tx && skb) rtl8169_doorbell(tp); } } @@ -5162,8 +5165,8 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp) if (rc == -ENOENT) /* clk-core allows NULL (for suspend / resume) */ rc = 0; - else if (rc != -EPROBE_DEFER) - dev_err(d, "failed to get clk: %d\n", rc); + else + dev_err_probe(d, rc, "failed to get clk\n"); } else { tp->clk = clk; rc = clk_prepare_enable(clk); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 1503cc9ec6e2..536aa8961dc6 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -631,6 +631,7 @@ static void netsec_set_rx_de(struct netsec_priv *priv, static bool netsec_clean_tx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; + struct xdp_frame_bulk bq; struct netsec_de *entry; int tail = dring->tail; unsigned int bytes; @@ -639,8 +640,11 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv) spin_lock(&dring->lock); bytes = 0; + xdp_frame_bulk_init(&bq); entry = dring->vaddr + DESC_SZ * tail; + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) && cnt < DESC_NUM) { struct netsec_desc *desc; @@ -665,7 +669,10 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv) dev_kfree_skb(desc->skb); } else { bytes += desc->xdpf->len; - xdp_return_frame(desc->xdpf); + if (desc->buf_type == TYPE_NETSEC_XDP_TX) + xdp_return_frame_rx_napi(desc->xdpf); + else + xdp_return_frame_bulk(desc->xdpf, &bq); } next: /* clean up so netsec_uninit_pkt_dring() won't free the skb @@ -684,6 +691,9 @@ next: entry = dring->vaddr + DESC_SZ * tail; cnt++; } + xdp_flush_frame_bulk(&bq); + + rcu_read_unlock(); spin_unlock(&dring->lock); diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 55151960a698..eb4c5d408a83 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -92,6 +92,7 @@ #define GSI_CMD_TIMEOUT 5 /* seconds */ #define GSI_CHANNEL_STOP_RX_RETRIES 10 +#define GSI_CHANNEL_MODEM_HALT_RETRIES 10 #define GSI_MHI_EVENT_ID_START 10 /* 1st reserved event id */ #define GSI_MHI_EVENT_ID_END 16 /* Last reserved event id */ @@ -365,15 +366,15 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) /* Get initial event ring state */ evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id); if (evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) { - dev_err(gsi->dev, "bad event ring state %u before alloc\n", - evt_ring->state); + dev_err(gsi->dev, "event ring %u bad state %u before alloc\n", + evt_ring_id, evt_ring->state); return -EINVAL; } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) { - dev_err(gsi->dev, "bad event ring state %u after alloc\n", - evt_ring->state); + dev_err(gsi->dev, "event ring %u bad state %u after alloc\n", + evt_ring_id, evt_ring->state); ret = -EIO; } @@ -389,15 +390,15 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) if (state != GSI_EVT_RING_STATE_ALLOCATED && state != GSI_EVT_RING_STATE_ERROR) { - dev_err(gsi->dev, "bad event ring state %u before reset\n", - evt_ring->state); + dev_err(gsi->dev, "event ring %u bad state %u before reset\n", + evt_ring_id, evt_ring->state); return; } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) - dev_err(gsi->dev, "bad event ring state %u after reset\n", - evt_ring->state); + dev_err(gsi->dev, "event ring %u bad state %u after reset\n", + evt_ring_id, evt_ring->state); } /* Issue a hardware de-allocation request for an allocated event ring */ @@ -407,15 +408,15 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) int ret; if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) { - dev_err(gsi->dev, "bad event ring state %u before dealloc\n", - evt_ring->state); + dev_err(gsi->dev, "event ring %u state %u before dealloc\n", + evt_ring_id, evt_ring->state); return; } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); if (!ret && evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) - dev_err(gsi->dev, "bad event ring state %u after dealloc\n", - evt_ring->state); + dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n", + evt_ring_id, evt_ring->state); } /* Fetch the current state of a channel from hardware */ @@ -479,7 +480,8 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) /* Get initial channel state */ state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_NOT_ALLOCATED) { - dev_err(dev, "bad channel state %u before alloc\n", state); + dev_err(dev, "channel %u bad state %u before alloc\n", + channel_id, state); return -EINVAL; } @@ -488,7 +490,8 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) /* Channel state will normally have been updated */ state = gsi_channel_state(channel); if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) { - dev_err(dev, "bad channel state %u after alloc\n", state); + dev_err(dev, "channel %u bad state %u after alloc\n", + channel_id, state); ret = -EIO; } @@ -505,7 +508,8 @@ static int gsi_channel_start_command(struct gsi_channel *channel) state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_ALLOCATED && state != GSI_CHANNEL_STATE_STOPPED) { - dev_err(dev, "bad channel state %u before start\n", state); + dev_err(dev, "channel %u bad state %u before start\n", + gsi_channel_id(channel), state); return -EINVAL; } @@ -514,7 +518,8 @@ static int gsi_channel_start_command(struct gsi_channel *channel) /* Channel state will normally have been updated */ state = gsi_channel_state(channel); if (!ret && state != GSI_CHANNEL_STATE_STARTED) { - dev_err(dev, "bad channel state %u after start\n", state); + dev_err(dev, "channel %u bad state %u after start\n", + gsi_channel_id(channel), state); ret = -EIO; } @@ -538,7 +543,8 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) if (state != GSI_CHANNEL_STATE_STARTED && state != GSI_CHANNEL_STATE_STOP_IN_PROC) { - dev_err(dev, "bad channel state %u before stop\n", state); + dev_err(dev, "channel %u bad state %u before stop\n", + gsi_channel_id(channel), state); return -EINVAL; } @@ -553,7 +559,8 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) if (state == GSI_CHANNEL_STATE_STOP_IN_PROC) return -EAGAIN; - dev_err(dev, "bad channel state %u after stop\n", state); + dev_err(dev, "channel %u bad state %u after stop\n", + gsi_channel_id(channel), state); return -EIO; } @@ -570,7 +577,10 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_STOPPED && state != GSI_CHANNEL_STATE_ERROR) { - dev_err(dev, "bad channel state %u before reset\n", state); + /* No need to reset a channel already in ALLOCATED state */ + if (state != GSI_CHANNEL_STATE_ALLOCATED) + dev_err(dev, "channel %u bad state %u before reset\n", + gsi_channel_id(channel), state); return; } @@ -579,7 +589,8 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) /* Channel state will normally have been updated */ state = gsi_channel_state(channel); if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) - dev_err(dev, "bad channel state %u after reset\n", state); + dev_err(dev, "channel %u bad state %u after reset\n", + gsi_channel_id(channel), state); } /* Deallocate an ALLOCATED GSI channel */ @@ -592,7 +603,8 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_ALLOCATED) { - dev_err(dev, "bad channel state %u before dealloc\n", state); + dev_err(dev, "channel %u bad state %u before dealloc\n", + channel_id, state); return; } @@ -601,7 +613,8 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) /* Channel state will normally have been updated */ state = gsi_channel_state(channel); if (!ret && state != GSI_CHANNEL_STATE_NOT_ALLOCATED) - dev_err(dev, "bad channel state %u after dealloc\n", state); + dev_err(dev, "channel %u bad state %u after dealloc\n", + channel_id, state); } /* Ring an event ring doorbell, reporting the last entry processed by the AP. @@ -1075,10 +1088,38 @@ static void gsi_isr_gp_int1(struct gsi *gsi) u32 result; u32 val; + /* This interrupt is used to handle completions of the two GENERIC + * GSI commands. We use these to allocate and halt channels on + * the modem's behalf due to a hardware quirk on IPA v4.2. Once + * allocated, the modem "owns" these channels, and as a result we + * have no way of knowing the channel's state at any given time. + * + * It is recommended that we halt the modem channels we allocated + * when shutting down, but it's possible the channel isn't running + * at the time we issue the HALT command. We'll get an error in + * that case, but it's harmless (the channel is already halted). + * + * For this reason, we silently ignore a CHANNEL_NOT_RUNNING error + * if we receive it. + */ val = ioread32(gsi->virt + GSI_CNTXT_SCRATCH_0_OFFSET); result = u32_get_bits(val, GENERIC_EE_RESULT_FMASK); - if (result != GENERIC_EE_SUCCESS) + + switch (result) { + case GENERIC_EE_SUCCESS: + case GENERIC_EE_CHANNEL_NOT_RUNNING: + gsi->result = 0; + break; + + case GENERIC_EE_RETRY: + gsi->result = -EAGAIN; + break; + + default: dev_err(gsi->dev, "global INT1 generic result %u\n", result); + gsi->result = -EIO; + break; + } complete(&gsi->completion); } @@ -1590,7 +1631,7 @@ static int gsi_generic_command(struct gsi *gsi, u32 channel_id, iowrite32(BIT(ERROR_INT), gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); if (success) - return 0; + return gsi->result; dev_err(gsi->dev, "GSI generic command %u to channel %u timed out\n", opcode, channel_id); @@ -1606,7 +1647,17 @@ static int gsi_modem_channel_alloc(struct gsi *gsi, u32 channel_id) static void gsi_modem_channel_halt(struct gsi *gsi, u32 channel_id) { - (void)gsi_generic_command(gsi, channel_id, GSI_GENERIC_HALT_CHANNEL); + u32 retries = GSI_CHANNEL_MODEM_HALT_RETRIES; + int ret; + + do + ret = gsi_generic_command(gsi, channel_id, + GSI_GENERIC_HALT_CHANNEL); + while (ret == -EAGAIN && retries--); + + if (ret) + dev_err(gsi->dev, "error %d halting modem channel %u\n", + ret, channel_id); } /* Setup function for channels */ diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h index ecc784e3a812..96c9aed397aa 100644 --- a/drivers/net/ipa/gsi.h +++ b/drivers/net/ipa/gsi.h @@ -161,6 +161,7 @@ struct gsi { u32 type_enabled_bitmap; /* GSI IRQ types enabled */ u32 ieob_enabled_bitmap; /* IEOB IRQ enabled (event rings) */ struct completion completion; /* for global EE commands */ + int result; /* Negative errno (generic commands) */ struct mutex mutex; /* protects commands, programming */ }; diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index a2c0fde05819..9dcf16f399b7 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -13,6 +13,7 @@ #include "ipa.h" #include "ipa_clock.h" #include "ipa_modem.h" +#include "ipa_data.h" /** * DOC: IPA Clocking @@ -29,18 +30,6 @@ * An IPA clock reference must be held for any access to IPA hardware. */ -#define IPA_CORE_CLOCK_RATE (75UL * 1000 * 1000) /* Hz */ - -/* Interconnect path bandwidths (each times 1000 bytes per second) */ -#define IPA_MEMORY_AVG (80 * 1000) /* 80 MBps */ -#define IPA_MEMORY_PEAK (600 * 1000) - -#define IPA_IMEM_AVG (80 * 1000) -#define IPA_IMEM_PEAK (350 * 1000) - -#define IPA_CONFIG_AVG (40 * 1000) -#define IPA_CONFIG_PEAK (40 * 1000) - /** * struct ipa_clock - IPA clocking information * @count: Clocking reference count @@ -49,6 +38,7 @@ * @memory_path: Memory interconnect * @imem_path: Internal memory interconnect * @config_path: Configuration space interconnect + * @interconnect_data: Interconnect configuration data */ struct ipa_clock { refcount_t count; @@ -57,6 +47,7 @@ struct ipa_clock { struct icc_path *memory_path; struct icc_path *imem_path; struct icc_path *config_path; + const struct ipa_interconnect_data *interconnect_data; }; static struct icc_path * @@ -113,18 +104,25 @@ static void ipa_interconnect_exit(struct ipa_clock *clock) /* Currently we only use one bandwidth level, so just "enable" interconnects */ static int ipa_interconnect_enable(struct ipa *ipa) { + const struct ipa_interconnect_data *data; struct ipa_clock *clock = ipa->clock; int ret; - ret = icc_set_bw(clock->memory_path, IPA_MEMORY_AVG, IPA_MEMORY_PEAK); + data = &clock->interconnect_data[IPA_INTERCONNECT_MEMORY]; + ret = icc_set_bw(clock->memory_path, data->average_rate, + data->peak_rate); if (ret) return ret; - ret = icc_set_bw(clock->imem_path, IPA_IMEM_AVG, IPA_IMEM_PEAK); + data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; + ret = icc_set_bw(clock->memory_path, data->average_rate, + data->peak_rate); if (ret) goto err_memory_path_disable; - ret = icc_set_bw(clock->config_path, IPA_CONFIG_AVG, IPA_CONFIG_PEAK); + data = &clock->interconnect_data[IPA_INTERCONNECT_CONFIG]; + ret = icc_set_bw(clock->memory_path, data->average_rate, + data->peak_rate); if (ret) goto err_imem_path_disable; @@ -141,6 +139,7 @@ err_memory_path_disable: /* To disable an interconnect, we just its bandwidth to 0 */ static int ipa_interconnect_disable(struct ipa *ipa) { + const struct ipa_interconnect_data *data; struct ipa_clock *clock = ipa->clock; int ret; @@ -159,9 +158,13 @@ static int ipa_interconnect_disable(struct ipa *ipa) return 0; err_imem_path_reenable: - (void)icc_set_bw(clock->imem_path, IPA_IMEM_AVG, IPA_IMEM_PEAK); + data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; + (void)icc_set_bw(clock->imem_path, data->average_rate, + data->peak_rate); err_memory_path_reenable: - (void)icc_set_bw(clock->memory_path, IPA_MEMORY_AVG, IPA_MEMORY_PEAK); + data = &clock->interconnect_data[IPA_INTERCONNECT_MEMORY]; + (void)icc_set_bw(clock->memory_path, data->average_rate, + data->peak_rate); return ret; } @@ -257,7 +260,8 @@ u32 ipa_clock_rate(struct ipa *ipa) } /* Initialize IPA clocking */ -struct ipa_clock *ipa_clock_init(struct device *dev) +struct ipa_clock * +ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) { struct ipa_clock *clock; struct clk *clk; @@ -269,10 +273,10 @@ struct ipa_clock *ipa_clock_init(struct device *dev) return ERR_CAST(clk); } - ret = clk_set_rate(clk, IPA_CORE_CLOCK_RATE); + ret = clk_set_rate(clk, data->core_clock_rate); if (ret) { - dev_err(dev, "error %d setting core clock rate to %lu\n", - ret, IPA_CORE_CLOCK_RATE); + dev_err(dev, "error %d setting core clock rate to %u\n", + ret, data->core_clock_rate); goto err_clk_put; } @@ -282,6 +286,7 @@ struct ipa_clock *ipa_clock_init(struct device *dev) goto err_clk_put; } clock->core = clk; + clock->interconnect_data = data->interconnect; ret = ipa_interconnect_init(clock, dev); if (ret) diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h index 1d70f1de3875..1fe634760e59 100644 --- a/drivers/net/ipa/ipa_clock.h +++ b/drivers/net/ipa/ipa_clock.h @@ -9,6 +9,7 @@ struct device; struct ipa; +struct ipa_clock_data; /** * ipa_clock_rate() - Return the current IPA core clock rate @@ -21,10 +22,12 @@ u32 ipa_clock_rate(struct ipa *ipa); /** * ipa_clock_init() - Initialize IPA clocking * @dev: IPA device + * @data: Clock configuration data * * Return: A pointer to an ipa_clock structure, or a pointer-coded error */ -struct ipa_clock *ipa_clock_init(struct device *dev); +struct ipa_clock *ipa_clock_init(struct device *dev, + const struct ipa_clock_data *data); /** * ipa_clock_exit() - Inverse of ipa_clock_init() diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c index 37dada4da680..5cc0ed77edb9 100644 --- a/drivers/net/ipa/ipa_data-sc7180.c +++ b/drivers/net/ipa/ipa_data-sc7180.c @@ -309,6 +309,26 @@ static struct ipa_mem_data ipa_mem_data = { .smem_size = 0x00002000, }; +static struct ipa_clock_data ipa_clock_data = { + .core_clock_rate = 100 * 1000 * 1000, /* Hz */ + /* Interconnect rates are in 1000 byte/second units */ + .interconnect = { + [IPA_INTERCONNECT_MEMORY] = { + .peak_rate = 465000, /* 465 MBps */ + .average_rate = 80000, /* 80 MBps */ + }, + /* Average rate is unused for the next two interconnects */ + [IPA_INTERCONNECT_IMEM] = { + .peak_rate = 68570, /* 68.570 MBps */ + .average_rate = 0, /* unused */ + }, + [IPA_INTERCONNECT_CONFIG] = { + .peak_rate = 30000, /* 30 MBps */ + .average_rate = 0, /* unused */ + }, + }, +}; + /* Configuration data for the SC7180 SoC. */ const struct ipa_data ipa_data_sc7180 = { .version = IPA_VERSION_4_2, @@ -316,4 +336,5 @@ const struct ipa_data ipa_data_sc7180 = { .endpoint_data = ipa_gsi_endpoint_data, .resource_data = &ipa_resource_data, .mem_data = &ipa_mem_data, + .clock_data = &ipa_clock_data, }; diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c index bd92b619e7fe..f8fee8d3ca42 100644 --- a/drivers/net/ipa/ipa_data-sdm845.c +++ b/drivers/net/ipa/ipa_data-sdm845.c @@ -329,6 +329,26 @@ static struct ipa_mem_data ipa_mem_data = { .smem_size = 0x00002000, }; +static struct ipa_clock_data ipa_clock_data = { + .core_clock_rate = 75 * 1000 * 1000, /* Hz */ + /* Interconnect rates are in 1000 byte/second units */ + .interconnect = { + [IPA_INTERCONNECT_MEMORY] = { + .peak_rate = 600000, /* 600 MBps */ + .average_rate = 80000, /* 80 MBps */ + }, + /* Average rate is unused for the next two interconnects */ + [IPA_INTERCONNECT_IMEM] = { + .peak_rate = 350000, /* 350 MBps */ + .average_rate = 0, /* unused */ + }, + [IPA_INTERCONNECT_CONFIG] = { + .peak_rate = 40000, /* 40 MBps */ + .average_rate = 0, /* unused */ + }, + }, +}; + /* Configuration data for the SDM845 SoC. */ const struct ipa_data ipa_data_sdm845 = { .version = IPA_VERSION_3_5_1, @@ -336,4 +356,5 @@ const struct ipa_data ipa_data_sdm845 = { .endpoint_data = ipa_gsi_endpoint_data, .resource_data = &ipa_resource_data, .mem_data = &ipa_mem_data, + .clock_data = &ipa_clock_data, }; diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index 83c4b78373ef..0ed5ffe2b8da 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -241,7 +241,7 @@ struct ipa_resource_data { }; /** - * struct ipa_mem - description of IPA memory regions + * struct ipa_mem_data - description of IPA memory regions * @local_count: number of regions defined in the local[] array * @local: array of IPA-local memory region descriptors * @imem_addr: physical address of IPA region within IMEM @@ -258,6 +258,34 @@ struct ipa_mem_data { u32 smem_size; }; +/** enum ipa_interconnect_id - IPA interconnect identifier */ +enum ipa_interconnect_id { + IPA_INTERCONNECT_MEMORY, + IPA_INTERCONNECT_IMEM, + IPA_INTERCONNECT_CONFIG, + IPA_INTERCONNECT_COUNT, /* Last; not an interconnect */ +}; + +/** + * struct ipa_interconnect_data - description of IPA interconnect rates + * @peak_rate: Peak interconnect bandwidth (in 1000 byte/sec units) + * @average_rate: Average interconnect bandwidth (in 1000 byte/sec units) + */ +struct ipa_interconnect_data { + u32 peak_rate; + u32 average_rate; +}; + +/** + * struct ipa_clock_data - description of IPA clock and interconnect rates + * @core_clock_rate: Core clock rate (Hz) + * @interconnect: Array of interconnect bandwidth parameters + */ +struct ipa_clock_data { + u32 core_clock_rate; + struct ipa_interconnect_data interconnect[IPA_INTERCONNECT_COUNT]; +}; + /** * struct ipa_data - combined IPA/GSI configuration data * @version: IPA hardware version @@ -273,6 +301,7 @@ struct ipa_data { const struct ipa_gsi_endpoint_data *endpoint_data; const struct ipa_resource_data *resource_data; const struct ipa_mem_data *mem_data; + const struct ipa_clock_data *clock_data; }; extern const struct ipa_data ipa_data_sdm845; diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 3fb9c5d90b70..e9bd0d72f2db 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -728,6 +728,14 @@ static int ipa_probe(struct platform_device *pdev) ipa_validate_build(); + /* Get configuration data early; needed for clock initialization */ + data = of_device_get_match_data(dev); + if (!data) { + /* This is really IPA_VALIDATE (should never happen) */ + dev_err(dev, "matched hardware not supported\n"); + return -ENODEV; + } + /* If we need Trust Zone, make sure it's available */ modem_init = of_property_read_bool(dev->of_node, "modem-init"); if (!modem_init) @@ -748,22 +756,13 @@ static int ipa_probe(struct platform_device *pdev) /* The clock and interconnects might not be ready when we're * probed, so might return -EPROBE_DEFER. */ - clock = ipa_clock_init(dev); + clock = ipa_clock_init(dev, data->clock_data); if (IS_ERR(clock)) { ret = PTR_ERR(clock); goto err_rproc_put; } - /* No more EPROBE_DEFER. Get our configuration data */ - data = of_device_get_match_data(dev); - if (!data) { - /* This is really IPA_VALIDATE (should never happen) */ - dev_err(dev, "matched hardware not supported\n"); - ret = -ENOTSUPP; - goto err_clock_exit; - } - - /* Allocate and initialize the IPA structure */ + /* No more EPROBE_DEFER. Allocate and initialize the IPA structure */ ipa = kzalloc(sizeof(*ipa), GFP_KERNEL); if (!ipa) { ret = -ENOMEM; @@ -864,6 +863,11 @@ static int ipa_remove(struct platform_device *pdev) if (ipa->setup_complete) { ret = ipa_modem_stop(ipa); + /* If starting or stopping is in progress, try once more */ + if (ret == -EBUSY) { + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); + ret = ipa_modem_stop(ipa); + } if (ret) return ret; @@ -884,6 +888,15 @@ static int ipa_remove(struct platform_device *pdev) return 0; } +static void ipa_shutdown(struct platform_device *pdev) +{ + int ret; + + ret = ipa_remove(pdev); + if (ret) + dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret); +} + /** * ipa_suspend() - Power management system suspend callback * @dev: IPA device structure @@ -941,8 +954,9 @@ static const struct dev_pm_ops ipa_pm_ops = { }; static struct platform_driver ipa_driver = { - .probe = ipa_probe, - .remove = ipa_remove, + .probe = ipa_probe, + .remove = ipa_remove, + .shutdown = ipa_shutdown, .driver = { .name = "ipa", .pm = &ipa_pm_ops, diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index f1884d90a876..166f0d6cbcf7 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -13,9 +13,9 @@ nsim_get_pause_stats(struct net_device *dev, { struct netdevsim *ns = netdev_priv(dev); - if (ns->ethtool.report_stats_rx) + if (ns->ethtool.pauseparam.report_stats_rx) pause_stats->rx_pause_frames = 1; - if (ns->ethtool.report_stats_tx) + if (ns->ethtool.pauseparam.report_stats_tx) pause_stats->tx_pause_frames = 2; } @@ -25,8 +25,8 @@ nsim_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) struct netdevsim *ns = netdev_priv(dev); pause->autoneg = 0; /* We don't support ksettings, so can't pretend */ - pause->rx_pause = ns->ethtool.rx; - pause->tx_pause = ns->ethtool.tx; + pause->rx_pause = ns->ethtool.pauseparam.rx; + pause->tx_pause = ns->ethtool.pauseparam.tx; } static int @@ -37,28 +37,88 @@ nsim_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) if (pause->autoneg) return -EINVAL; - ns->ethtool.rx = pause->rx_pause; - ns->ethtool.tx = pause->tx_pause; + ns->ethtool.pauseparam.rx = pause->rx_pause; + ns->ethtool.pauseparam.tx = pause->tx_pause; + return 0; +} + +static int nsim_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal) +{ + struct netdevsim *ns = netdev_priv(dev); + + memcpy(coal, &ns->ethtool.coalesce, sizeof(ns->ethtool.coalesce)); + return 0; +} + +static int nsim_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal) +{ + struct netdevsim *ns = netdev_priv(dev); + + memcpy(&ns->ethtool.coalesce, coal, sizeof(ns->ethtool.coalesce)); + return 0; +} + +static void nsim_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *ring) +{ + struct netdevsim *ns = netdev_priv(dev); + + memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); +} + +static int nsim_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *ring) +{ + struct netdevsim *ns = netdev_priv(dev); + + memcpy(&ns->ethtool.ring, ring, sizeof(ns->ethtool.ring)); return 0; } static const struct ethtool_ops nsim_ethtool_ops = { - .get_pause_stats = nsim_get_pause_stats, - .get_pauseparam = nsim_get_pauseparam, - .set_pauseparam = nsim_set_pauseparam, + .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, + .get_pause_stats = nsim_get_pause_stats, + .get_pauseparam = nsim_get_pauseparam, + .set_pauseparam = nsim_set_pauseparam, + .set_coalesce = nsim_set_coalesce, + .get_coalesce = nsim_get_coalesce, + .get_ringparam = nsim_get_ringparam, + .set_ringparam = nsim_set_ringparam, }; +static void nsim_ethtool_ring_init(struct netdevsim *ns) +{ + ns->ethtool.ring.rx_max_pending = 4096; + ns->ethtool.ring.rx_jumbo_max_pending = 4096; + ns->ethtool.ring.rx_mini_max_pending = 4096; + ns->ethtool.ring.tx_max_pending = 4096; +} + void nsim_ethtool_init(struct netdevsim *ns) { struct dentry *ethtool, *dir; ns->netdev->ethtool_ops = &nsim_ethtool_ops; + nsim_ethtool_ring_init(ns); + ethtool = debugfs_create_dir("ethtool", ns->nsim_dev_port->ddir); dir = debugfs_create_dir("pause", ethtool); debugfs_create_bool("report_stats_rx", 0600, dir, - &ns->ethtool.report_stats_rx); + &ns->ethtool.pauseparam.report_stats_rx); debugfs_create_bool("report_stats_tx", 0600, dir, - &ns->ethtool.report_stats_tx); + &ns->ethtool.pauseparam.report_stats_tx); + + dir = debugfs_create_dir("ring", ethtool); + debugfs_create_u32("rx_max_pending", 0600, dir, + &ns->ethtool.ring.rx_max_pending); + debugfs_create_u32("rx_jumbo_max_pending", 0600, dir, + &ns->ethtool.ring.rx_jumbo_max_pending); + debugfs_create_u32("rx_mini_max_pending", 0600, dir, + &ns->ethtool.ring.rx_mini_max_pending); + debugfs_create_u32("tx_max_pending", 0600, dir, + &ns->ethtool.ring.tx_max_pending); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 698be048041b..19b1e6ef5573 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -15,6 +15,7 @@ #include <linux/debugfs.h> #include <linux/device.h> +#include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/netdevice.h> @@ -51,13 +52,19 @@ struct nsim_ipsec { u32 ok; }; -struct nsim_ethtool { +struct nsim_ethtool_pauseparam { bool rx; bool tx; bool report_stats_rx; bool report_stats_tx; }; +struct nsim_ethtool { + struct nsim_ethtool_pauseparam pauseparam; + struct ethtool_coalesce coalesce; + struct ethtool_ringparam ring; +}; + struct netdevsim { struct net_device *netdev; struct nsim_dev *nsim_dev; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index e59067c64e97..2b42e46066b4 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -546,10 +546,11 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) /* de-assert bus level PHY GPIO reset */ gpiod = devm_gpiod_get_optional(&bus->dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(gpiod)) { - dev_err(&bus->dev, "mii_bus %s couldn't get reset GPIO\n", - bus->id); + err = dev_err_probe(&bus->dev, PTR_ERR(gpiod), + "mii_bus %s couldn't get reset GPIO\n", + bus->id); device_del(&bus->dev); - return PTR_ERR(gpiod); + return err; } else if (gpiod) { bus->reset_gpiod = gpiod; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6408b446051f..e3da25b51ae4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -215,6 +215,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_USECS_HIGH BIT(19) #define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH BIT(20) #define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(21, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h new file mode 100644 index 000000000000..ae2279fe830a --- /dev/null +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __SOC_OTX2_ASM_H +#define __SOC_OTX2_ASM_H + +#if defined(CONFIG_ARM64) +/* + * otx2_lmt_flush is used for LMT store operation. + * On octeontx2 platform CPT instruction enqueue and + * NIX packet send are only possible via LMTST + * operations and it uses LDEOR instruction targeting + * the coprocessor address. + */ +#define otx2_lmt_flush(ioaddr) \ +({ \ + u64 result = 0; \ + __asm__ volatile(".cpu generic+lse\n" \ + "ldeor xzr, %x[rf], [%[rs]]" \ + : [rf]"=r" (result) \ + : [rs]"r" (ioaddr)); \ + (result); \ +}) +#else +#define otx2_lmt_flush(ioaddr) ({ 0; }) +#endif + +#endif /* __SOC_OTX2_ASM_H */ diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 6e706d838e4e..b6cf07143a8a 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -88,7 +88,8 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts); void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); -void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts); +void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, + struct mptcp_out_options *opts); /* move the skb extension owership, with the assumption that 'to' is * newly allocated diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 88186b95b3c2..9be7320b994f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -203,6 +203,20 @@ static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net) { return &net->ct.nf_ct_proto.icmpv6; } + +/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */ +static inline void nf_ct_set_tcp_be_liberal(struct nf_conn *ct) +{ + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; +} + +/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */ +static inline bool nf_conntrack_tcp_established(const struct nf_conn *ct) +{ + return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED && + test_bit(IPS_ASSURED_BIT, &ct->status); +} #endif #ifdef CONFIG_NF_CT_PROTO_DCCP diff --git a/include/net/sock.h b/include/net/sock.h index 1d29aeae74fd..80469c2c448d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1271,10 +1271,15 @@ static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; +#ifdef CONFIG_INET return sk->sk_prot->stream_memory_free ? INDIRECT_CALL_1(sk->sk_prot->stream_memory_free, tcp_stream_memory_free, sk, wake) : true; +#else + return sk->sk_prot->stream_memory_free ? + sk->sk_prot->stream_memory_free(sk, wake) : true; +#endif } static inline bool sk_stream_memory_free(const struct sock *sk) @@ -1595,7 +1600,8 @@ void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -bool lock_sock_fast(struct sock *sk); +bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); + /** * unlock_sock_fast - complement of lock_sock_fast * @sk: socket @@ -1605,11 +1611,14 @@ bool lock_sock_fast(struct sock *sk); * If slow mode is on, we call regular release_sock() */ static inline void unlock_sock_fast(struct sock *sk, bool slow) + __releases(&sk->sk_lock.slock) { - if (slow) + if (slow) { release_sock(sk); - else + __release(&sk->sk_lock.slock); + } else { spin_unlock_bh(&sk->sk_lock.slock); + } } /* Used by processes to "lock" a socket state, so that diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 77bcc84875af..adb674a860d3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -30,7 +30,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge *br = netdev_priv(dev); struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; - struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); const struct nf_br_ops *nf_ops; u8 state = BR_STATE_FORWARDING; const unsigned char *dest; @@ -45,10 +44,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } - u64_stats_update_begin(&brstats->syncp); - brstats->tx_packets++; - brstats->tx_bytes += skb->len; - u64_stats_update_end(&brstats->syncp); + dev_sw_netstats_tx_add(dev, 1, skb->len); br_switchdev_frame_unmark(skb); BR_INPUT_SKB_CB(skb)->brdev = dev; @@ -119,26 +115,26 @@ static int br_dev_init(struct net_device *dev) struct net_bridge *br = netdev_priv(dev); int err; - br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!br->stats) + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) return -ENOMEM; err = br_fdb_hash_init(br); if (err) { - free_percpu(br->stats); + free_percpu(dev->tstats); return err; } err = br_mdb_hash_init(br); if (err) { - free_percpu(br->stats); + free_percpu(dev->tstats); br_fdb_hash_fini(br); return err; } err = br_vlan_init(br); if (err) { - free_percpu(br->stats); + free_percpu(dev->tstats); br_mdb_hash_fini(br); br_fdb_hash_fini(br); return err; @@ -146,7 +142,7 @@ static int br_dev_init(struct net_device *dev) err = br_multicast_init_stats(br); if (err) { - free_percpu(br->stats); + free_percpu(dev->tstats); br_vlan_flush(br); br_mdb_hash_fini(br); br_fdb_hash_fini(br); @@ -165,7 +161,7 @@ static void br_dev_uninit(struct net_device *dev) br_vlan_flush(br); br_mdb_hash_fini(br); br_fdb_hash_fini(br); - free_percpu(br->stats); + free_percpu(dev->tstats); } static int br_dev_open(struct net_device *dev) @@ -202,15 +198,6 @@ static int br_dev_stop(struct net_device *dev) return 0; } -static void br_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - struct net_bridge *br = netdev_priv(dev); - - netdev_stats_to_stats64(stats, &dev->stats); - dev_fetch_sw_netstats(stats, br->stats); -} - static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); @@ -404,7 +391,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_init = br_dev_init, .ndo_uninit = br_dev_uninit, .ndo_start_xmit = br_dev_xmit, - .ndo_get_stats64 = br_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = br_set_mac_address, .ndo_set_rx_mode = br_dev_set_multicast_list, .ndo_change_rx_flags = br_dev_change_rx_flags, diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 21808985f268..8ca1f1bc6d12 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -35,12 +35,8 @@ static int br_pass_frame_up(struct sk_buff *skb) struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); struct net_bridge_vlan_group *vg; - struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); - u64_stats_update_begin(&brstats->syncp); - brstats->rx_packets++; - brstats->rx_bytes += skb->len; - u64_stats_update_end(&brstats->syncp); + dev_sw_netstats_rx_add(brdev, skb->len); vg = br_vlan_group_rcu(br); /* Bridge is just like any other port. Make sure the diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f44f46a305aa..d538ccec0acd 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -377,7 +377,6 @@ struct net_bridge { spinlock_t hash_lock; struct hlist_head frame_type_list; struct net_device *dev; - struct pcpu_sw_netstats __percpu *stats; unsigned long options; /* These fields are accessed on each packet */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING diff --git a/net/core/sock.c b/net/core/sock.c index 727ea1cc633c..9badbe7bb4e4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3078,7 +3078,7 @@ EXPORT_SYMBOL(release_sock); * * sk_lock.slock unlocked, owned = 1, BH enabled */ -bool lock_sock_fast(struct sock *sk) +bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); @@ -3096,6 +3096,7 @@ bool lock_sock_fast(struct sock *sk) * The sk_lock has mutex_lock() semantics here: */ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + __acquire(&sk->sk_lock.slock); local_bh_enable(); return true; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ff2266d2b998..7efc753e4d9d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -522,10 +522,10 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, if (!clone) return; - DSA_SKB_CB(skb)->clone = clone; - - if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type)) + if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type)) { + DSA_SKB_CB(skb)->clone = clone; return; + } kfree_skb(clone); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 99905bc01d40..41880d3521ed 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -445,11 +445,12 @@ struct tcp_out_options { struct mptcp_out_options mptcp; }; -static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts) +static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp, + struct tcp_out_options *opts) { #if IS_ENABLED(CONFIG_MPTCP) if (unlikely(OPTION_MPTCP & opts->options)) - mptcp_write_options(ptr, &opts->mptcp); + mptcp_write_options(ptr, tp, &opts->mptcp); #endif } @@ -701,7 +702,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, smc_options_write(ptr, &options); - mptcp_options_write(ptr, opts); + mptcp_options_write(ptr, tp, opts); } static void smc_set_option(const struct tcp_sock *tp, @@ -1346,7 +1347,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, } } - tcp_options_write((__be32 *)(th + 1), tp, &opts); skb_shinfo(skb)->gso_type = sk->sk_gso_type; if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) { th->window = htons(tcp_select_window(sk)); @@ -1357,6 +1357,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, */ th->window = htons(min(tp->rcv_wnd, 65535U)); } + + tcp_options_write((__be32 *)(th + 1), tp, &opts); + #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index f2d1e27a2bc1..8a59b3e44599 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -242,7 +242,9 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->add_addr = 1; mp_opt->addr_id = *ptr++; - pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo); + pr_debug("ADD_ADDR%s: id=%d, echo=%d", + (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "", + mp_opt->addr_id, mp_opt->echo); if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4); ptr += 4; @@ -528,6 +530,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; + WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -573,17 +576,27 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, } #endif -static bool mptcp_established_options_add_addr(struct sock *sk, +static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); + bool drop_other_suboptions = false; + unsigned int opt_size = *size; struct mptcp_addr_info saddr; bool echo; int len; + if (mptcp_pm_should_add_signal_ipv6(msk) && + skb && skb_is_tcp_pure_ack(skb)) { + pr_debug("drop other suboptions"); + opts->suboptions = 0; + remaining += opt_size; + drop_other_suboptions = true; + } + if (!mptcp_pm_should_add_signal(msk) || !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo))) return false; @@ -593,6 +606,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, return false; *size = len; + if (drop_other_suboptions) + *size -= opt_size; opts->addr_id = saddr.id; if (saddr.family == AF_INET) { opts->suboptions |= OPTION_MPTCP_ADD_ADDR; @@ -678,7 +693,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, *size += opt_size; remaining -= opt_size; - if (mptcp_established_options_add_addr(sk, &opt_size, remaining, opts)) { + if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; @@ -759,6 +774,11 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, goto fully_established; } + if (mp_opt->add_addr) { + WRITE_ONCE(msk->fully_established, true); + return true; + } + /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP. Fallback scenarios requires a reset for * MP_JOIN subflows. @@ -991,7 +1011,24 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } } -void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) +static void mptcp_set_rwin(const struct tcp_sock *tp) +{ + const struct sock *ssk = (const struct sock *)tp; + const struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk; + u64 ack_seq; + + subflow = mptcp_subflow_ctx(ssk); + msk = mptcp_sk(subflow->conn); + + ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd; + + if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent))) + WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); +} + +void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, + struct mptcp_out_options *opts) { if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions) { @@ -1148,4 +1185,7 @@ mp_capable_done: TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } } + + if (tp) + mptcp_set_rwin(tp); } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index f9c88e2abb8e..75c5040e8d5d 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -16,11 +16,17 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo) { + u8 add_addr = READ_ONCE(msk->pm.add_addr_signal); + pr_debug("msk=%p, local_id=%d", msk, addr->id); msk->pm.local = *addr; - WRITE_ONCE(msk->pm.add_addr_echo, echo); - WRITE_ONCE(msk->pm.add_addr_signal, true); + add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); + if (echo) + add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); + if (addr->family == AF_INET6) + add_addr |= BIT(MPTCP_ADD_ADDR_IPV6); + WRITE_ONCE(msk->pm.add_addr_signal, add_addr); return 0; } @@ -149,14 +155,24 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_lock_bh(&pm->lock); - if (!READ_ONCE(pm->accept_addr)) + if (!READ_ONCE(pm->accept_addr)) { mptcp_pm_announce_addr(msk, addr, true); - else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) + mptcp_pm_add_addr_send_ack(msk); + } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->remote = *addr; + } spin_unlock_bh(&pm->lock); } +void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) +{ + if (!mptcp_pm_should_add_signal_ipv6(msk)) + return; + + mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); +} + void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) { struct mptcp_pm_data *pm = &msk->pm; @@ -182,13 +198,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, if (!mptcp_pm_should_add_signal(msk)) goto out_unlock; - *echo = READ_ONCE(msk->pm.add_addr_echo); + *echo = mptcp_pm_should_add_signal_echo(msk); if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo)) goto out_unlock; *saddr = msk->pm.local; - WRITE_ONCE(msk->pm.add_addr_signal, false); + WRITE_ONCE(msk->pm.add_addr_signal, 0); ret = true; out_unlock: @@ -232,11 +248,10 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) msk->pm.subflows = 0; msk->pm.rm_id = 0; WRITE_ONCE(msk->pm.work_pending, false); - WRITE_ONCE(msk->pm.add_addr_signal, false); + WRITE_ONCE(msk->pm.add_addr_signal, 0); WRITE_ONCE(msk->pm.rm_addr_signal, false); WRITE_ONCE(msk->pm.accept_addr, false); WRITE_ONCE(msk->pm.accept_subflow, false); - WRITE_ONCE(msk->pm.add_addr_echo, false); msk->pm.status = 0; spin_lock_init(&msk->pm.lock); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f8a9d82a0ea8..03f2c28f11f5 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -228,6 +228,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (!mptcp_pm_should_add_signal(msk)) { pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); + mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; } @@ -328,6 +329,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (mptcp_pm_alloc_anno_list(msk, local)) { msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_add_addr_send_ack(msk); } } else { /* pick failed, avoid fourther attempts later */ @@ -398,6 +400,33 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) spin_lock_bh(&msk->pm.lock); mptcp_pm_announce_addr(msk, &remote, true); + mptcp_pm_nl_add_addr_send_ack(msk); +} + +void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + + if (!mptcp_pm_should_add_signal_ipv6(msk)) + return; + + __mptcp_flush_join_list(msk); + subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); + if (subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 add_addr; + + spin_unlock_bh(&msk->pm.lock); + pr_debug("send ack for add_addr6"); + lock_sock(ssk); + tcp_send_ack(ssk); + release_sock(ssk); + spin_lock_bh(&msk->pm.lock); + + add_addr = READ_ONCE(msk->pm.add_addr_signal); + add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6); + WRITE_ONCE(msk->pm.add_addr_signal, add_addr); + } } void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index aeda4357de9a..4b7794835fea 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -168,19 +168,19 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) struct rb_node **p, *parent; u64 seq, end_seq, max_seq; struct sk_buff *skb1; - int space; seq = MPTCP_SKB_CB(skb)->map_seq; end_seq = MPTCP_SKB_CB(skb)->end_seq; - space = tcp_space(sk); - max_seq = space > 0 ? space + msk->ack_seq : msk->ack_seq; + max_seq = READ_ONCE(msk->rcv_wnd_sent); pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq, RB_EMPTY_ROOT(&msk->out_of_order_queue)); - if (after64(seq, max_seq)) { + if (after64(end_seq, max_seq)) { /* out of window */ mptcp_drop(sk, skb); - pr_debug("oow by %ld", (unsigned long)seq - (unsigned long)max_seq); + pr_debug("oow by %lld, rcv_wnd_sent %llu\n", + (unsigned long long)end_seq - (unsigned long)max_seq, + (unsigned long long)msk->rcv_wnd_sent); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW); return; } @@ -407,16 +407,42 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } -static void mptcp_send_ack(struct mptcp_sock *msk) +static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +{ + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ + if (subflow->request_join && !subflow->fully_established) + return false; + + /* only send if our side has not closed yet */ + return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); +} + +static void mptcp_send_ack(struct mptcp_sock *msk, bool force) { struct mptcp_subflow_context *subflow; + struct sock *pick = NULL; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - lock_sock(ssk); - tcp_send_ack(ssk); - release_sock(ssk); + if (force) { + lock_sock(ssk); + tcp_send_ack(ssk); + release_sock(ssk); + continue; + } + + /* if the hintes ssk is still active, use it */ + pick = ssk; + if (ssk == msk->ack_hint) + break; + } + if (!force && pick) { + lock_sock(pick); + tcp_cleanup_rbuf(pick, 1); + release_sock(pick); } } @@ -468,7 +494,7 @@ static bool mptcp_check_data_fin(struct sock *sk) ret = true; mptcp_set_timeout(sk, NULL); - mptcp_send_ack(msk); + mptcp_send_ack(msk, true); mptcp_close_wake_up(sk); } return ret; @@ -483,7 +509,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, unsigned int moved = 0; bool more_data_avail; struct tcp_sock *tp; - u32 old_copied_seq; bool done = false; int sk_rbuf; @@ -500,7 +525,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, pr_debug("msk=%p ssk=%p", msk, ssk); tp = tcp_sk(ssk); - old_copied_seq = tp->copied_seq; do { u32 map_remaining, offset; u32 seq = tp->copied_seq; @@ -564,11 +588,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, break; } } while (more_data_avail); + msk->ack_hint = ssk; *bytes += moved; - if (tp->copied_seq != old_copied_seq) - tcp_cleanup_rbuf(ssk, 1); - return done; } @@ -672,25 +694,14 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) goto wake; - if (move_skbs_to_msk(msk, ssk)) - goto wake; - - /* mptcp socket is owned, release_cb should retry */ - if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, - &sk->sk_tsq_flags)) { - sock_hold(sk); + move_skbs_to_msk(msk, ssk); - /* need to try again, its possible release_cb() has already - * been called after the test_and_set_bit() above. - */ - move_skbs_to_msk(msk, ssk); - } wake: if (wake) sk->sk_data_ready(sk); } -static void __mptcp_flush_join_list(struct mptcp_sock *msk) +void __mptcp_flush_join_list(struct mptcp_sock *msk) { if (likely(list_empty(&msk->join_list))) return; @@ -777,7 +788,9 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) inet_sk_state_store(sk, TCP_CLOSE_WAIT); break; case TCP_FIN_WAIT1: - /* fallback sockets skip TCP_CLOSING - TCP will take care */ + inet_sk_state_store(sk, TCP_CLOSING); + break; + case TCP_FIN_WAIT2: inet_sk_state_store(sk, TCP_CLOSE); break; default: @@ -1093,18 +1106,6 @@ static void mptcp_nospace(struct mptcp_sock *msk) mptcp_clean_una((struct sock *)msk); } -static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) -{ - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ - if (subflow->request_join && !subflow->fully_established) - return false; - - /* only send if our side has not closed yet */ - return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); -} - #define MPTCP_SEND_BURST_SIZE ((1 << 16) - \ sizeof(struct tcphdr) - \ MAX_TCP_OPTION_SPACE - \ @@ -1532,7 +1533,7 @@ new_measure: msk->rcvq_space.time = mstamp; } -static bool __mptcp_move_skbs(struct mptcp_sock *msk) +static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) { unsigned int moved = 0; bool done; @@ -1551,12 +1552,16 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) slowpath = lock_sock_fast(ssk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + if (moved && rcv) { + WRITE_ONCE(msk->rmem_pending, min(rcv, moved)); + tcp_cleanup_rbuf(ssk, 1); + WRITE_ONCE(msk->rmem_pending, 0); + } unlock_sock_fast(ssk, slowpath); } while (!done); if (mptcp_ofo_queue(msk) || moved > 0) { - if (!mptcp_check_data_fin((struct sock *)msk)) - mptcp_send_ack(msk); + mptcp_check_data_fin((struct sock *)msk); return true; } return false; @@ -1580,8 +1585,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); __mptcp_flush_join_list(msk); - while (len > (size_t)copied) { - int bytes_read; + for (;;) { + int bytes_read, old_space; bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); if (unlikely(bytes_read < 0)) { @@ -1593,9 +1598,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; if (skb_queue_empty(&sk->sk_receive_queue) && - __mptcp_move_skbs(msk)) + __mptcp_move_skbs(msk, len - copied)) continue; + /* be sure to advertise window change */ + old_space = READ_ONCE(msk->old_wspace); + if ((tcp_space(sk) - old_space) >= old_space) + mptcp_send_ack(msk, false); + /* only the master socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() */ @@ -1648,7 +1658,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* .. race-breaker: ssk might have gotten new data * after last __mptcp_move_skbs() returned false. */ - if (unlikely(__mptcp_move_skbs(msk))) + if (unlikely(__mptcp_move_skbs(msk, 0))) set_bit(MPTCP_DATA_READY, &msk->flags); } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) { /* data to read but mptcp_wait_data() cleared DATA_READY */ @@ -1724,8 +1734,11 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) continue; /* still data outstanding at TCP level? Don't retransmit. */ - if (!tcp_write_queue_empty(ssk)) + if (!tcp_write_queue_empty(ssk)) { + if (inet_csk(ssk)->icsk_ca_state >= TCP_CA_Loss) + continue; return NULL; + } if (subflow->backup) { if (!backup) @@ -1803,6 +1816,10 @@ static void pm_work(struct mptcp_sock *msk) pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); mptcp_pm_nl_add_addr_received(msk); } + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); + mptcp_pm_nl_add_addr_send_ack(msk); + } if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); mptcp_pm_nl_rm_addr_received(msk); @@ -1862,7 +1879,6 @@ static void mptcp_worker(struct work_struct *work) int state, ret; lock_sock(sk); - set_bit(MPTCP_WORKER_RUNNING, &msk->flags); state = sk->sk_state; if (unlikely(state == TCP_CLOSE)) goto unlock; @@ -1873,7 +1889,6 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - __mptcp_move_skbs(msk); if (mptcp_send_head(sk)) mptcp_push_pending(sk, 0); @@ -1940,7 +1955,6 @@ reset_unlock: mptcp_reset_timer(sk); unlock: - clear_bit(MPTCP_WORKER_RUNNING, &msk->flags); release_sock(sk); sock_put(sk); } @@ -1958,6 +1972,7 @@ static int __mptcp_init_sock(struct sock *sk) msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; + msk->ack_hint = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; @@ -2011,11 +2026,7 @@ static void mptcp_cancel_work(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* if called by the work itself, do not try to cancel the work, or - * we will hang. - */ - if (!test_bit(MPTCP_WORKER_RUNNING, &msk->flags) && - cancel_work_sync(&msk->work)) + if (cancel_work_sync(&msk->work)) __sock_put(sk); } @@ -2092,10 +2103,16 @@ static void __mptcp_check_send_data_fin(struct sock *sk) WRITE_ONCE(msk->snd_nxt, msk->write_seq); - /* fallback socket will not get data_fin/ack, can move to close now */ - if (__mptcp_check_fallback(msk) && sk->sk_state == TCP_LAST_ACK) { - inet_sk_state_store(sk, TCP_CLOSE); - mptcp_close_wake_up(sk); + /* fallback socket will not get data_fin/ack, can move to the next + * state now + */ + if (__mptcp_check_fallback(msk)) { + if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_close_wake_up(sk); + } else if (sk->sk_state == TCP_FIN_WAIT1) { + inet_sk_state_store(sk, TCP_FIN_WAIT2); + } } __mptcp_flush_join_list(msk); @@ -2286,6 +2303,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; WRITE_ONCE(msk->ack_seq, ack_seq); + WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } sock_reset_flag(nsk, SOCK_RCU_FREE); @@ -2338,7 +2356,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; - struct sock *ssk = newsk; subflow = mptcp_subflow_ctx(newsk); new_mptcp_sock = subflow->conn; @@ -2353,22 +2370,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, /* acquire the 2nd reference for the owning socket */ sock_hold(new_mptcp_sock); - - local_bh_disable(); - bh_lock_sock(new_mptcp_sock); - msk = mptcp_sk(new_mptcp_sock); - msk->first = newsk; - newsk = new_mptcp_sock; - mptcp_copy_inaddrs(newsk, ssk); - list_add(&subflow->node, &msk->conn_list); - sock_hold(ssk); - - mptcp_rcv_space_init(msk, ssk); - bh_unlock_sock(new_mptcp_sock); - - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - local_bh_enable(); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); } else { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); @@ -2505,8 +2508,7 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; } -#define MPTCP_DEFERRED_ALL (TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_WRITE_TIMER_DEFERRED) +#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) /* this is very alike tcp_release_cb() but we must handle differently a * different set of events @@ -2524,16 +2526,6 @@ static void mptcp_release_cb(struct sock *sk) sock_release_ownership(sk); - if (flags & TCPF_DELACK_TIMER_DEFERRED) { - struct mptcp_sock *msk = mptcp_sk(sk); - struct sock *ssk; - - ssk = mptcp_subflow_recv_lookup(msk); - if (!ssk || sk->sk_state == TCP_CLOSE || - !schedule_work(&msk->work)) - __sock_put(sk); - } - if (flags & TCPF_WRITE_TIMER_DEFERRED) { mptcp_retransmit_handler(sk); __sock_put(sk); @@ -2593,6 +2585,7 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->snd_nxt, msk->write_seq); WRITE_ONCE(msk->ack_seq, ack_seq); + WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); WRITE_ONCE(msk->can_ack, 1); atomic64_set(&msk->snd_una, msk->write_seq); @@ -2819,6 +2812,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { struct mptcp_sock *msk = mptcp_sk(newsock->sk); struct mptcp_subflow_context *subflow; + struct sock *newsk = newsock->sk; + bool slowpath; + + slowpath = lock_sock_fast(newsk); + mptcp_copy_inaddrs(newsk, msk->first); + mptcp_rcv_space_init(msk, msk->first); /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. @@ -2830,6 +2829,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } + unlock_sock_fast(newsk, slowpath); } if (inet_csk_listen_poll(ssock->sk)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b4c8dbe9236b..82d5626323b1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -91,7 +91,6 @@ #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 -#define MPTCP_WORKER_RUNNING 6 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -161,11 +160,18 @@ struct mptcp_addr_info { enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_RECEIVED, + MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, MPTCP_PM_SUBFLOW_ESTABLISHED, }; +enum mptcp_add_addr_status { + MPTCP_ADD_ADDR_SIGNAL, + MPTCP_ADD_ADDR_ECHO, + MPTCP_ADD_ADDR_IPV6, +}; + struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; @@ -173,13 +179,12 @@ struct mptcp_pm_data { spinlock_t lock; /*protects the whole PM data */ - bool add_addr_signal; + u8 add_addr_signal; bool rm_addr_signal; bool server_side; bool work_pending; bool accept_addr; bool accept_subflow; - bool add_addr_echo; u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; @@ -211,13 +216,16 @@ struct mptcp_sock { u64 write_seq; u64 snd_nxt; u64 ack_seq; + u64 rcv_wnd_sent; u64 rcv_data_fin_seq; struct sock *last_snd; int snd_burst; + int old_wspace; atomic64_t snd_una; atomic64_t wnd_end; unsigned long timer_ival; u32 token; + int rmem_pending; unsigned long flags; bool can_ack; bool fully_established; @@ -225,6 +233,7 @@ struct mptcp_sock { bool snd_data_fin_enable; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ spinlock_t join_list_lock; + struct sock *ack_hint; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -252,6 +261,11 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) return (struct mptcp_sock *)sk; } +static inline int __mptcp_space(const struct sock *sk) +{ + return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending); +} + static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); @@ -404,6 +418,15 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); } +static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + sock_hold(mptcp_subflow_tcp_sock(subflow)); + spin_lock_bh(&msk->join_list_lock); + list_add_tail(&subflow->node, &msk->join_list); + spin_unlock_bh(&msk->join_list_lock); +} + int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, @@ -454,6 +477,7 @@ bool mptcp_schedule_work(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); +void __mptcp_flush_join_list(struct mptcp_sock *msk); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { return READ_ONCE(msk->snd_data_fin_enable) && @@ -494,6 +518,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk, void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); +void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); struct mptcp_pm_add_entry * @@ -508,7 +533,17 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.add_addr_signal); + return READ_ONCE(msk->pm.add_addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); +} + +static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.add_addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); +} + +static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.add_addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6); } static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) @@ -535,6 +570,7 @@ void mptcp_pm_nl_data_init(struct mptcp_sock *msk); void mptcp_pm_nl_fully_established(struct mptcp_sock *msk); void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk); void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk); +void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 794259789194..4d8abff1be18 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -578,6 +578,10 @@ create_child: */ inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); + /* link the newly created socket to the msk */ + mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx); + WRITE_ONCE(mptcp_sk(new_msk)->first, child); + /* new mpc subflow takes ownership of the newly * created mptcp socket */ @@ -846,8 +850,6 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) subflow->map_valid = 0; - if (incr) - tcp_cleanup_rbuf(ssk, incr); } static bool subflow_check_data_avail(struct sock *ssk) @@ -969,7 +971,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); const struct sock *sk = subflow->conn; - *space = tcp_space(sk); + *space = __mptcp_space(sk); *full_space = tcp_full_space(sk); } @@ -1124,11 +1126,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (err && err != -EINPROGRESS) goto failed; - sock_hold(ssk); - spin_lock_bh(&msk->join_list_lock); - list_add_tail(&subflow->node, &msk->join_list); - spin_unlock_bh(&msk->join_list_lock); - + mptcp_add_pending_subflow(msk, subflow); return err; failed: diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index c8fb2187ad4b..811c6c9b59e1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -834,12 +834,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static bool nf_conntrack_tcp_established(const struct nf_conn *ct) -{ - return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED && - test_bit(IPS_ASSURED_BIT, &ct->status); -} - /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 4beb96139d77..6a88daab0190 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1037,6 +1037,14 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, ovs_ct_helper(skb, info->family) != NF_ACCEPT) { return -EINVAL; } + + if (nf_ct_protonum(ct) == IPPROTO_TCP && + nf_ct_is_confirmed(ct) && nf_conntrack_tcp_established(ct)) { + /* Be liberal for tcp packets so that out-of-window + * packets are not marked invalid. + */ + nf_ct_set_tcp_be_liberal(ct); + } } return 0; diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index f4031002d5e9..5de47d72f8c9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -29,6 +29,10 @@ ALL_TESTS=" bridge_extern_learn_test neigh_offload_test nexthop_offload_test + nexthop_obj_invalid_test + nexthop_obj_offload_test + nexthop_obj_group_offload_test + nexthop_obj_route_offload_test devlink_reload_test " NUM_NETIFS=2 @@ -674,6 +678,191 @@ nexthop_offload_test() sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down } +nexthop_obj_invalid_test() +{ + # Test that invalid nexthop object configurations are rejected + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64 + setup_wait + + ip nexthop add id 1 via 192.0.2.3 fdb + check_fail $? "managed to configure an FDB nexthop when should not" + + ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1 + check_fail $? "managed to configure a nexthop with MPLS encap when should not" + + ip nexthop add id 1 blackhole + check_fail $? "managed to configure a blackhole nexthop when should not" + + ip nexthop add id 1 dev $swp1 + ip nexthop add id 2 dev $swp1 + ip nexthop add id 10 group 1/2 + check_fail $? "managed to configure a nexthop group with device-only nexthops when should not" + + log_test "nexthop objects - invalid configurations" + + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_offload_test() +{ + # Test offload indication of nexthop objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded when should" + + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop marked as offloaded after setting neigh to failed state" + + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded after neigh replace" + + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop marked as offloaded after replacing to use an invalid address" + + ip nexthop replace id 1 via 192.0.2.2 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded after replacing to use a valid address" + + log_test "nexthop objects offload indication" + + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_group_offload_test() +{ + # Test offload indication of nexthop group objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 + ip nexthop add id 10 group 1/2 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "IPv4 nexthop not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 2 + check_err $? "IPv6 nexthop not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group not marked as offloaded when should" + + # Invalidate nexthop id 1 + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 10 + check_fail $? "nexthop group not marked as offloaded with one valid nexthop" + + # Invalidate nexthop id 2 + ip neigh replace 2001:db8:1::2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group marked as offloaded when should not" + + # Revalidate nexthop id 1 + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group not marked as offloaded after revalidating nexthop" + + log_test "nexthop group objects offload indication" + + ip neigh del 2001:db8:1::2 dev $swp1 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 10 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_route_offload_test() +{ + # Test offload indication of routes using nexthop objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + ip route replace 198.51.100.0/24 nhid 1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded when using valid nexthop" + + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded after replacing valid nexthop with a valid one" + + ip nexthop replace id 1 via 192.0.2.4 dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route marked as offloaded after replacing valid nexthop with an invalid one" + + ip nexthop replace id 1 via 192.0.2.2 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded after replacing invalid nexthop with a valid one" + + log_test "routes using nexthop objects offload indication" + + ip route del 198.51.100.0/24 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh new file mode 100755 index 000000000000..9adfba8f87e6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +function get_value { + local query="${SETTINGS_MAP[$1]}" + + echo $(ethtool -c $NSIM_NETDEV | \ + awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[ \t]/, "", $2); print $2}') +} + +function update_current_settings { + for key in ${!SETTINGS_MAP[@]}; do + CURRENT_SETTINGS[$key]=$(get_value $key) + done + echo ${CURRENT_SETTINGS[@]} +} + +if ! ethtool -h | grep -q coalesce; then + echo "SKIP: No --coalesce support in ethtool" + exit 4 +fi + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +declare -A SETTINGS_MAP=( + ["rx-frames-low"]="rx-frame-low" + ["tx-frames-low"]="tx-frame-low" + ["rx-frames-high"]="rx-frame-high" + ["tx-frames-high"]="tx-frame-high" + ["rx-usecs"]="rx-usecs" + ["rx-frames"]="rx-frames" + ["rx-usecs-irq"]="rx-usecs-irq" + ["rx-frames-irq"]="rx-frames-irq" + ["tx-usecs"]="tx-usecs" + ["tx-frames"]="tx-frames" + ["tx-usecs-irq"]="tx-usecs-irq" + ["tx-frames-irq"]="tx-frames-irq" + ["stats-block-usecs"]="stats-block-usecs" + ["pkt-rate-low"]="pkt-rate-low" + ["rx-usecs-low"]="rx-usecs-low" + ["tx-usecs-low"]="tx-usecs-low" + ["pkt-rate-high"]="pkt-rate-high" + ["rx-usecs-high"]="rx-usecs-high" + ["tx-usecs-high"]="tx-usecs-high" + ["sample-interval"]="sample-interval" +) + +declare -A CURRENT_SETTINGS=( + ["rx-frames-low"]="" + ["tx-frames-low"]="" + ["rx-frames-high"]="" + ["tx-frames-high"]="" + ["rx-usecs"]="" + ["rx-frames"]="" + ["rx-usecs-irq"]="" + ["rx-frames-irq"]="" + ["tx-usecs"]="" + ["tx-frames"]="" + ["tx-usecs-irq"]="" + ["tx-frames-irq"]="" + ["stats-block-usecs"]="" + ["pkt-rate-low"]="" + ["rx-usecs-low"]="" + ["tx-usecs-low"]="" + ["pkt-rate-high"]="" + ["rx-usecs-high"]="" + ["tx-usecs-high"]="" + ["sample-interval"]="" +) + +declare -A EXPECTED_SETTINGS=( + ["rx-frames-low"]="" + ["tx-frames-low"]="" + ["rx-frames-high"]="" + ["tx-frames-high"]="" + ["rx-usecs"]="" + ["rx-frames"]="" + ["rx-usecs-irq"]="" + ["rx-frames-irq"]="" + ["tx-usecs"]="" + ["tx-frames"]="" + ["tx-usecs-irq"]="" + ["tx-frames-irq"]="" + ["stats-block-usecs"]="" + ["pkt-rate-low"]="" + ["rx-usecs-low"]="" + ["tx-usecs-low"]="" + ["pkt-rate-high"]="" + ["rx-usecs-high"]="" + ["tx-usecs-high"]="" + ["sample-interval"]="" +) + +# populate the expected settings map +for key in ${!SETTINGS_MAP[@]}; do + EXPECTED_SETTINGS[$key]=$(get_value $key) +done + +# test +for key in ${!SETTINGS_MAP[@]}; do + value=$((RANDOM % $((2**32-1)))) + + ethtool -C $NSIM_NETDEV "$key" "$value" + + EXPECTED_SETTINGS[$key]="$value" + expected=${EXPECTED_SETTINGS[@]} + current=$(update_current_settings) + + check $? "$current" "$expected" + set +x +done + +# bool settings which ethtool displays on the same line +ethtool -C $NSIM_NETDEV adaptive-rx on +s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: off") +check $? "$s" "" + +ethtool -C $NSIM_NETDEV adaptive-tx on +s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: on") +check $? "$s" "" + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh new file mode 100644 index 000000000000..9f64d5c7107b --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 +NSIM_NETDEV= +num_passes=0 +num_errors=0 + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_nsim +} + +trap cleanup EXIT + +function check { + local code=$1 + local str=$2 + local exp_str=$3 + + if [ $code -ne 0 ]; then + ((num_errors++)) + return + fi + + if [ "$str" != "$exp_str" ]; then + echo -e "Expected: '$exp_str', got '$str'" + ((num_errors++)) + return + fi + + ((num_passes++)) +} + +function make_netdev { + # Make a netdevsim + old_netdevs=$(ls /sys/class/net) + + if ! $(lsmod | grep -q netdevsim); then + modprobe netdevsim + fi + + echo $NSIM_ID > /sys/bus/netdevsim/new_device + # get new device name + ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/ +} diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh index 25c896b9e2eb..b4a7abfe5454 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh @@ -1,60 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -NSIM_ID=$((RANDOM % 1024)) -NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID -NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 -NSIM_NETDEV= -num_passes=0 -num_errors=0 - -function cleanup_nsim { - if [ -e $NSIM_DEV_SYS ]; then - echo $NSIM_ID > /sys/bus/netdevsim/del_device - fi -} - -function cleanup { - cleanup_nsim -} - -trap cleanup EXIT - -function get_netdev_name { - local -n old=$1 - - new=$(ls /sys/class/net) - - for netdev in $new; do - for check in $old; do - [ $netdev == $check ] && break - done - - if [ $netdev != $check ]; then - echo $netdev - break - fi - done -} - -function check { - local code=$1 - local str=$2 - local exp_str=$3 - - if [ $code -ne 0 ]; then - ((num_errors++)) - return - fi - - if [ "$str" != "$exp_str" ]; then - echo -e "Expected: '$exp_str', got '$str'" - ((num_errors++)) - return - fi - - ((num_passes++)) -} +source ethtool-common.sh # Bail if ethtool is too old if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then @@ -62,13 +9,7 @@ if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then exit 4 fi -# Make a netdevsim -old_netdevs=$(ls /sys/class/net) - -modprobe netdevsim -echo $NSIM_ID > /sys/bus/netdevsim/new_device - -NSIM_NETDEV=`get_netdev_name old_netdevs` +NSIM_NETDEV=$(make_netdev) set -o pipefail diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh new file mode 100755 index 000000000000..c969559ffa7a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +function get_value { + local query="${SETTINGS_MAP[$1]}" + + echo $(ethtool -g $NSIM_NETDEV | \ + tail -n +$CURR_SETT_LINE | \ + awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}') +} + +function update_current_settings { + for key in ${!SETTINGS_MAP[@]}; do + CURRENT_SETTINGS[$key]=$(get_value $key) + done + echo ${CURRENT_SETTINGS[@]} +} + +if ! ethtool -h | grep -q set-ring >/dev/null; then + echo "SKIP: No --set-ring support in ethtool" + exit 4 +fi + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +declare -A SETTINGS_MAP=( + ["rx"]="RX" + ["rx-mini"]="RX Mini" + ["rx-jumbo"]="RX Jumbo" + ["tx"]="TX" +) + +declare -A EXPECTED_SETTINGS=( + ["rx"]="" + ["rx-mini"]="" + ["rx-jumbo"]="" + ["tx"]="" +) + +declare -A CURRENT_SETTINGS=( + ["rx"]="" + ["rx-mini"]="" + ["rx-jumbo"]="" + ["tx"]="" +) + +MAX_VALUE=$((RANDOM % $((2**32-1)))) +RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/) + +for ring_max_entry in $RING_MAX_LIST; do + echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry +done + +CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:) + +# populate the expected settings map +for key in ${!SETTINGS_MAP[@]}; do + EXPECTED_SETTINGS[$key]=$(get_value $key) +done + +# test +for key in ${!SETTINGS_MAP[@]}; do + value=$((RANDOM % $MAX_VALUE)) + + ethtool -G $NSIM_NETDEV "$key" "$value" + + EXPECTED_SETTINGS[$key]="$value" + expected=${EXPECTED_SETTINGS[@]} + current=$(update_current_settings) + + check $? "$current" "$expected" + set +x +done + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh new file mode 100755 index 000000000000..d03aa2cab9fd --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -0,0 +1,356 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test traffic distribution when a wECMP route forwards traffic to two GRE +# tunnels. +# +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|------------------------+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (gre) + g1b (gre) | +# | loc=192.0.2.65 loc=192.0.2.81 | +# | rem=192.0.2.66 --. rem=192.0.2.82 --. | +# | tos=inherit | tos=inherit | | +# | .------------------' | | +# | | .------------------' | +# | v v | +# | + $ul1.111 (vlan) + $ul1.222 (vlan) | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | \ / | +# | \________________/ | +# | | | +# | + $ul1 | +# +------------|-------------------------------+ +# | +# +------------|-------------------------------+ +# | SW2 + $ul2 | +# | _______|________ | +# | / \ | +# | / \ | +# | + $ul2.111 (vlan) + $ul2.222 (vlan) | +# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 | +# | | | | +# | | '------------------. | +# | '------------------. | | +# | + g2a (gre) | + g2b (gre) | | +# | loc=192.0.2.66 | loc=192.0.2.82 | | +# | rem=192.0.2.65 --' rem=192.0.2.81 --' | +# | tos=inherit tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | 2001:db8:2::1/64 | | +# +-------------------|------------------------+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# | 2001:db8:2::2/64 | +# +-------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_ipv4 + multipath_ipv6 + multipath_ipv6_l4 +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 + ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +sw1_create() +{ + simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $ul1 v$ol1 + vlan_create $ul1 111 v$ol1 192.0.2.129/28 + vlan_create $ul1 222 v$ol1 192.0.2.145/28 + + tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1 + __simple_if_init g1a v$ol1 192.0.2.65/32 + ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + + tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1 + __simple_if_init g1b v$ol1 192.0.2.81/32 + ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + + ip -6 nexthop add id 101 dev g1a + ip -6 nexthop add id 102 dev g1b + ip nexthop add id 103 group 101/102 + + ip route add vrf v$ol1 192.0.2.16/28 nhid 103 + ip route add vrf v$ol1 2001:db8:2::/64 nhid 103 +} + +sw1_destroy() +{ + ip route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 192.0.2.16/28 + + ip nexthop del id 103 + ip -6 nexthop del id 102 + ip -6 nexthop del id 101 + + ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + __simple_if_fini g1b 192.0.2.81/32 + tunnel_destroy g1b + + ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 222 + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64 +} + +sw2_create() +{ + simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + vlan_create $ul2 222 v$ol2 192.0.2.146/28 + + tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 + __simple_if_init g2a v$ol2 192.0.2.66/32 + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + + tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2 + __simple_if_init g2b v$ol2 192.0.2.82/32 + ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + + ip -6 nexthop add id 201 dev g2a + ip -6 nexthop add id 202 dev g2b + ip nexthop add id 203 group 201/202 + + ip route add vrf v$ol2 192.0.2.0/28 nhid 203 + ip route add vrf v$ol2 2001:db8:1::/64 nhid 203 + + tc qdisc add dev $ul2 clsact + tc filter add dev $ul2 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul2 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw2_destroy() +{ + tc qdisc del dev $ul2 clsact + + ip route del vrf v$ol2 2001:db8:1::/64 + ip route del vrf v$ol2 192.0.2.0/28 + + ip nexthop del id 203 + ip -6 nexthop del id 202 + ip -6 nexthop del id 201 + + ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + __simple_if_fini g2b 192.0.2.82/32 + tunnel_destroy g2b + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 222 + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 + ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +multipath4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 0 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + # Generate 16384 echo requests, each with a random flow label. + for ((i=0; i < 16384; ++i)); do + ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null + done + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +multipath6_l4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +multipath_ipv4() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6() +{ + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6_l4() +{ + log_info "Running IPv6 L4 hash multipath tests" + multipath6_l4_test "ECMP" 1 1 + multipath6_l4_test "Weighted MP 2:1" 2 1 + multipath6_l4_test "Weighted MP 11:45" 11 45 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index cf3d26c233e8..e8c2573d5232 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -280,6 +280,17 @@ multipath_test() multipath4_test "Weighted MP 2:1" 2 1 multipath4_test "Weighted MP 11:45" 11 45 + log_info "Running IPv4 multipath tests with IPv6 link-local nexthops" + ip nexthop replace id 101 via fe80:2::22 dev $rp12 + ip nexthop replace id 102 via fe80:3::23 dev $rp13 + + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 102 via 169.254.3.23 dev $rp13 + ip nexthop replace id 101 via 169.254.2.22 dev $rp12 + log_info "Running IPv6 multipath tests" multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 @@ -312,7 +323,6 @@ setup_prepare() router1_create router2_create - routing_nh_obj forwarding_enable } diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh new file mode 100755 index 000000000000..f3a53738bdcc --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_nh.sh @@ -0,0 +1,160 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp2 clsact + + ip address add 192.0.2.1/24 dev $rp1 + ip address add 2001:db8:1::1/64 dev $rp1 + + ip address add 198.51.100.1/24 dev $rp2 + ip address add 2001:db8:2::1/64 dev $rp2 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 198.51.100.1/24 dev $rp2 + + ip address del 2001:db8:1::1/64 dev $rp1 + ip address del 192.0.2.1/24 dev $rp1 + + tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +routing_nh_obj() +{ + # Create the nexthops as AF_INET6, so that IPv4 and IPv6 routes could + # use them. + ip -6 nexthop add id 101 dev $rp1 + ip -6 nexthop add id 102 dev $rp2 + + ip route replace 192.0.2.0/24 nhid 101 + ip route replace 2001:db8:1::/64 nhid 101 + ip route replace 198.51.100.0/24 nhid 102 + ip route replace 2001:db8:2::/64 nhid 102 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +routing_nh_obj + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0d93b243695f..0eae628d1ffd 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -5,6 +5,7 @@ ret=0 sin="" sout="" cin="" +cinsent="" cout="" ksft_skip=4 timeout=30 @@ -81,7 +82,7 @@ cleanup_partial() cleanup() { rm -f "$cin" "$cout" - rm -f "$sin" "$sout" + rm -f "$sin" "$sout" "$cinsent" cleanup_partial } @@ -144,6 +145,13 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +print_file_err() +{ + ls -l "$1" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "$1" +} + check_transfer() { in=$1 @@ -155,6 +163,7 @@ check_transfer() echo "[ FAIL ] $what does not match (in, out):" print_file_err "$in" print_file_err "$out" + ret=1 return 1 fi @@ -175,6 +184,23 @@ do_ping() fi } +link_failure() +{ + ns="$1" + + l=$((RANDOM%4)) + l=$((l+1)) + + veth="ns1eth$l" + ip -net "$ns" link set "$veth" down +} + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + do_transfer() { listener_ns="$1" @@ -182,9 +208,10 @@ do_transfer() cl_proto="$3" srv_proto="$4" connect_addr="$5" - rm_nr_ns1="$6" - rm_nr_ns2="$7" - speed="$8" + test_link_fail="$6" + rm_nr_ns1="$7" + rm_nr_ns2="$8" + speed="$9" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -215,12 +242,25 @@ do_transfer() mptcp_connect="./mptcp_connect -r" fi - ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + else + local_addr="0.0.0.0" + fi + + ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \ + -s ${srv_proto} ${local_addr} < "$sin" > "$sout" & spid=$! sleep 1 - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + if [ "$test_link_fail" -eq 0 ];then + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + else + ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \ + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" & + fi cpid=$! if [ $rm_nr_ns1 -gt 0 ]; then @@ -265,12 +305,17 @@ do_transfer() ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" + ret=1 return 1 fi check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + if [ "$test_link_fail" -eq 0 ];then + check_transfer $cin $sout "file received by server" + else + check_transfer $cinsent $sout "file received by server" + fi rets=$? if [ $retc -eq 0 ] && [ $rets -eq 0 ];then @@ -286,13 +331,12 @@ make_file() { name=$1 who=$2 + size=$3 - SIZE=1 - - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" - echo "Created $name (size $SIZE KB) containing data sent by $who" + echo "Created $name (size $size KB) containing data sent by $who" } run_tests() @@ -300,14 +344,32 @@ run_tests() listener_ns="$1" connector_ns="$2" connect_addr="$3" - rm_nr_ns1="${4:-0}" - rm_nr_ns2="${5:-0}" - speed="${6:-fast}" + test_linkfail="${4:-0}" + rm_nr_ns1="${5:-0}" + rm_nr_ns2="${6:-0}" + speed="${7:-fast}" lret=0 + oldin="" + + if [ "$test_linkfail" -eq 1 ];then + size=$((RANDOM%1024)) + size=$((size+1)) + size=$((size*128)) + + oldin=$(mktemp) + cp "$cin" "$oldin" + make_file "$cin" "client" $size + fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${rm_nr_ns1} ${rm_nr_ns2} ${speed} + ${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed} lret=$? + + if [ "$test_linkfail" -eq 1 ];then + cp "$oldin" "$cin" + rm -f "$oldin" + fi + if [ $lret -ne 0 ]; then ret=$lret return @@ -440,10 +502,11 @@ chk_rm_nr() sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) +cinsent=$(mktemp) cout=$(mktemp) init -make_file "$cin" "client" -make_file "$sin" "server" +make_file "$cin" "client" 1 +make_file "$sin" "server" 1 trap cleanup EXIT run_tests $ns1 $ns2 10.0.1.1 @@ -528,12 +591,23 @@ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 +# accept and use add_addr with additional subflows and link loss +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 1 +chk_join_nr "multiple flows, signal, link failure" 3 3 3 +chk_add_nr 1 1 + # add_addr timeout reset_with_add_addr_timeout ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 0 0 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 chk_add_nr 4 0 @@ -542,7 +616,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 1 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow chk_join_nr "remove single subflow" 1 1 1 chk_rm_nr 1 1 @@ -552,7 +626,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 2 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow chk_join_nr "remove multiple subflows" 2 2 2 chk_rm_nr 2 2 @@ -561,7 +635,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 1 0 slow +run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 chk_rm_nr 0 0 @@ -572,7 +646,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 1 1 slow +run_tests $ns1 $ns2 10.0.1.1 0 1 1 slow chk_join_nr "remove subflow and signal" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -584,11 +658,65 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 1 2 slow +run_tests $ns1 $ns2 10.0.1.1 0 1 2 slow chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 +# subflow IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "single subflow IPv6" 1 1 1 + +# add_address, unused IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "unused signal address IPv6" 0 0 0 +chk_add_nr 1 1 + +# signal address IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "single address IPv6" 1 1 1 +chk_add_nr 1 1 + +# add_addr timeout IPv6 +reset_with_add_addr_timeout 6 +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 +chk_add_nr 4 0 + +# single address IPv6, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_tests $ns1 $ns2 dead:beef:1::1 0 1 0 slow +chk_join_nr "remove single address IPv6" 1 1 1 +chk_add_nr 1 1 +chk_rm_nr 0 0 + +# subflow and signal IPv6, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow +run_tests $ns1 $ns2 dead:beef:1::1 0 1 1 slow +chk_join_nr "remove subflow and signal IPv6" 2 2 2 +chk_add_nr 1 1 +chk_rm_nr 1 1 + # single subflow, syncookies reset_with_cookies ip netns exec $ns1 ./pm_nl_ctl limits 0 1 |