diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 231 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mad.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 147 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 60 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 137 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 327 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 160 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/wr.c | 2 |
8 files changed, 687 insertions, 391 deletions
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 819c142857d6..de3c2fc6f361 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -94,13 +94,13 @@ struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; u32 dinlen; - u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; + u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)]; }; struct devx_umem_reg_cmd { void *in; u32 inlen; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 out[MLX5_ST_SZ_DW(create_umem_out)]; }; static struct mlx5_ib_ucontext * @@ -111,8 +111,8 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { - u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; void *uctx; int err; u16 uid; @@ -138,14 +138,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (err) return err; - uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + uid = MLX5_GET(create_uctx_out, out, uid); return uid; } void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); MLX5_SET(destroy_uctx_in, in, uid, uid); @@ -288,6 +288,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } +static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode) +{ + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + case MLX5_CMD_OP_CREATE_UMEM: + return MLX5_GET(create_umem_out, out, umem_id); + case MLX5_CMD_OP_CREATE_MKEY: + return MLX5_GET(create_mkey_out, out, mkey_index); + case MLX5_CMD_OP_CREATE_CQ: + return MLX5_GET(create_cq_out, out, cqn); + case MLX5_CMD_OP_ALLOC_PD: + return MLX5_GET(alloc_pd_out, out, pd); + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + return MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + case MLX5_CMD_OP_CREATE_RMP: + return MLX5_GET(create_rmp_out, out, rmpn); + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_GET(create_sq_out, out, sqn); + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_GET(create_rq_out, out, rqn); + case MLX5_CMD_OP_CREATE_RQT: + return MLX5_GET(create_rqt_out, out, rqtn); + case MLX5_CMD_OP_CREATE_TIR: + return MLX5_GET(create_tir_out, out, tirn); + case MLX5_CMD_OP_CREATE_TIS: + return MLX5_GET(create_tis_out, out, tisn); + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + return MLX5_GET(alloc_q_counter_out, out, counter_set_id); + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + return MLX5_GET(create_flow_table_out, out, table_id); + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + return MLX5_GET(create_flow_group_out, out, group_id); + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + return MLX5_GET(set_fte_in, in, flow_index); + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + return MLX5_GET(alloc_packet_reformat_context_out, out, + packet_reformat_id); + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + return MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + return MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + return MLX5_GET(set_l2_table_entry_in, in, table_index); + case MLX5_CMD_OP_CREATE_QP: + return MLX5_GET(create_qp_out, out, qpn); + case MLX5_CMD_OP_CREATE_SRQ: + return MLX5_GET(create_srq_out, out, srqn); + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_GET(create_dct_out, out, dctn); + case MLX5_CMD_OP_CREATE_XRQ: + return MLX5_GET(create_xrq_out, out, xrqn); + case MLX5_CMD_OP_ATTACH_TO_MCG: + return MLX5_GET(attach_to_mcg_in, in, qpn); + case MLX5_CMD_OP_ALLOC_XRCD: + return MLX5_GET(alloc_xrcd_out, out, xrcd); + case MLX5_CMD_OP_CREATE_PSV: + return MLX5_GET(create_psv_out, out, psv0_index); + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + return 0; + } +} + static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -399,8 +473,8 @@ static u64 devx_get_obj_id(const void *in) break; case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, - MLX5_GET(general_obj_in_cmd_hdr, in, - obj_id)); + MLX5_GET(query_modify_header_context_in, + in, modify_header_id)); break; case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, @@ -1019,63 +1093,76 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, u32 *dinlen, u32 *obj_id) { - u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *obj_id = devx_get_created_obj_id(in, out, opcode); *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); - - MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); - switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + switch (opcode) { case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, + MLX5_GET(general_obj_in_cmd_hdr, in, obj_type)); break; case MLX5_CMD_OP_CREATE_UMEM: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_umem_in, din, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, din, umem_id, *obj_id); break; case MLX5_CMD_OP_CREATE_MKEY: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, din, opcode, + MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id); break; case MLX5_CMD_OP_CREATE_CQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, *obj_id); break; case MLX5_CMD_OP_ALLOC_PD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, pd, *obj_id); break; case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_transport_domain_in, din, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, din, transport_domain, + *obj_id); break; case MLX5_CMD_OP_CREATE_RMP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, sqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, rqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIR: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, tirn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIS: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, tisn, *obj_id); break; case MLX5_CMD_OP_ALLOC_Q_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_q_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id); break; case MLX5_CMD_OP_CREATE_FLOW_TABLE: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); - *obj_id = MLX5_GET(create_flow_table_out, out, table_id); MLX5_SET(destroy_flow_table_in, din, other_vport, MLX5_GET(create_flow_table_in, in, other_vport)); MLX5_SET(destroy_flow_table_in, din, vport_number, @@ -1083,12 +1170,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_table_in, din, table_type, MLX5_GET(create_flow_table_in, in, table_type)); MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_table_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); break; case MLX5_CMD_OP_CREATE_FLOW_GROUP: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); - *obj_id = MLX5_GET(create_flow_group_out, out, group_id); MLX5_SET(destroy_flow_group_in, din, other_vport, MLX5_GET(create_flow_group_in, in, other_vport)); MLX5_SET(destroy_flow_group_in, din, vport_number, @@ -1098,12 +1184,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_group_in, din, table_id, MLX5_GET(create_flow_group_in, in, table_id)); MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_group_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); break; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); - *obj_id = MLX5_GET(set_fte_in, in, flow_index); MLX5_SET(delete_fte_in, din, other_vport, MLX5_GET(set_fte_in, in, other_vport)); MLX5_SET(delete_fte_in, din, vport_number, @@ -1113,63 +1198,70 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(delete_fte_in, din, table_id, MLX5_GET(set_fte_in, in, table_id)); MLX5_SET(delete_fte_in, din, flow_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_fte_in, din, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); break; case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_flow_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id, + *obj_id); break; case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_packet_reformat_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, din, + packet_reformat_id, *obj_id); break; case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_modify_header_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, din, + modify_header_id, *obj_id); break; case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); - *obj_id = MLX5_GET(create_scheduling_element_out, out, - scheduling_element_id); MLX5_SET(destroy_scheduling_element_in, din, scheduling_hierarchy, MLX5_GET(create_scheduling_element_in, in, scheduling_hierarchy)); MLX5_SET(destroy_scheduling_element_in, din, scheduling_element_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_scheduling_element_in, din, opcode, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); break; case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); - *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_vxlan_udp_dport_in, din, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); break; case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); - *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_l2_table_entry_in, din, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); break; case MLX5_CMD_OP_CREATE_QP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRC_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_xrc_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_DCT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id); break; case MLX5_CMD_OP_ATTACH_TO_MCG: *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); @@ -1178,16 +1270,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, opcode, + MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_ALLOC_XRCD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, opcode, + MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id); break; case MLX5_CMD_OP_CREATE_PSV: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_psv_in, din, opcode, MLX5_CMD_OP_DESTROY_PSV); - MLX5_SET(destroy_psv_in, din, psvn, - MLX5_GET(create_psv_out, out, psv0_index)); + MLX5_SET(destroy_psv_in, din, psvn, *obj_id); break; default: /* The entry must match to one of the devx_is_obj_create_cmd */ @@ -1215,9 +1310,9 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + init_waitqueue_head(&mkey->wait); - return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, - GFP_KERNEL)); + return mlx5r_store_odp_mkey(dev, mkey); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1290,16 +1385,15 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY && + xa_erase(&obj->ib_dev->odp_mkeys, + mlx5_base_mkey(obj->devx_mr.mmkey.key))) /* * The pagefault_single_data_segment() does commands against * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->odp_mkeys, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey); if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct); @@ -1345,6 +1439,16 @@ out: rcu_read_unlock(); } +static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +{ + if (!MLX5_CAP_GEN(dev->mdev, apu) || + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + apu_thread_cq)) + return false; + + return true; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -1398,7 +1502,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( obj->flags |= DEVX_OBJ_FLAGS_DCT; err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); - } else if (opcode == MLX5_CMD_OP_CREATE_CQ) { + } else if (opcode == MLX5_CMD_OP_CREATE_CQ && + !is_apu_thread_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, @@ -1968,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( num_alloc_xa_entries++; event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); - if (!event_sub) + if (!event_sub) { + err = -ENOMEM; goto err; + } list_add_tail(&event_sub->event_list, &sub_list); uverbs_uobject_get(&ev_file->uobj); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9bb9bb058932..652c6ccf1881 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -48,7 +48,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED && in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return true; - return dev->mdev->port_caps[port_num - 1].has_smi; + return dev->port_caps[port_num - 1].has_smi; } static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, @@ -279,7 +279,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; @@ -299,7 +299,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? + dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: @@ -308,8 +308,8 @@ out: return err; } -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad) +static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, + struct ib_smp *out_mad) { struct ib_smp *in_mad = NULL; int err = -ENOMEM; @@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); - props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; + props->pkey_tbl_len = dev->pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -589,7 +589,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (mdev->port_caps[port - 1].ext_port_cap & + if (dev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aabdc07e4753..0d69a697d75f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #include <linux/debugfs.h> @@ -461,7 +462,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; bool put_mdev = true; - u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; bool ext; @@ -499,20 +499,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width, ext); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->ip_gids = true; + if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) { + u16 qkey_viol_cntr; - props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, - roce_address_table_size); + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + } props->max_mtu = IB_MTU_4096; props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; - mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); - props->qkey_viol_cntr = qkey_viol_cntr; - /* If this is a stub query for an unaffiliated port stop here */ if (!put_mdev) goto out; @@ -815,9 +817,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (err) return err; - err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); - if (err) - return err; + props->max_pkeys = dev->pkey_table_len; err = mlx5_query_vendor_id(ibdev, &props->vendor_id); if (err) @@ -1384,19 +1384,17 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - int ret; + return mlx5_query_port_roce(ibdev, port, props); +} - /* Only link layer == ethernet is valid for representors - * and we always use port 1 +static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + /* Default special Pkey for representor device port as per the + * IB specification 1.3 section 10.9.1.2. */ - ret = mlx5_query_port_roce(ibdev, port, props); - if (ret || !props) - return ret; - - /* We don't support GIDS */ - props->gid_tbl_len = 0; - - return ret; + *pkey = 0xffff; + return 0; } static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, @@ -2935,8 +2933,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { - dev->mdev->port_caps[port - 1].has_smi = false; + for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) { + dev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { @@ -2948,10 +2946,10 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) port, err); return err; } - dev->mdev->port_caps[port - 1].has_smi = + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } else { - dev->mdev->port_caps[port - 1].has_smi = true; + dev->port_caps[port - 1].has_smi = true; } } } @@ -2960,63 +2958,12 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - int port; + unsigned int port; - for (port = 1; port <= dev->num_ports; port++) + rdma_for_each_port (&dev->ib_dev, port) mlx5_query_ext_port_caps(dev, port); } -static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - struct ib_device_attr *dprops = NULL; - struct ib_port_attr *pprops = NULL; - int err = -ENOMEM; - - pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); - if (!pprops) - goto out; - - dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); - if (!dprops) - goto out; - - err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); - if (err) { - mlx5_ib_warn(dev, "query_device failed %d\n", err); - goto out; - } - - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - goto out; - } - - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", - port, dprops->max_pkeys, pprops->gid_tbl_len); - -out: - kfree(pprops); - kfree(dprops); - - return err; -} - -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - /* For representors use port 1, is this is the only native - * port - */ - if (dev->is_rep) - return __get_port_caps(dev, 1); - return __get_port_caps(dev, port); -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -3488,10 +3435,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, if (err) goto unbind; - err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); - if (err) - goto unbind; - err = mlx5_add_netdev_notifier(ibdev, port_num); if (err) { mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", @@ -3569,11 +3512,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) break; } } - if (!bound) { - get_port_caps(dev, i + 1); + if (!bound) mlx5_ib_dbg(dev, "no free port found for port %d\n", i + 1); - } } list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); @@ -3926,8 +3867,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - cleanup_srcu_struct(&dev->odp_srcu); - + mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -3938,6 +3878,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) int err; int i; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); @@ -3956,27 +3902,14 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (err) goto err_mp; - if (!mlx5_core_mp_enabled(mdev)) { - for (i = 1; i <= dev->num_ports; i++) { - err = get_port_caps(dev, i); - if (err) - break; - } - } else { - err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); - } + err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); if (err) goto err_mp; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; - dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); - dev->ib_dev.dev.parent = mdev->device; - dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); @@ -3987,17 +3920,11 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - return 0; err_mp: mlx5_ib_cleanup_multiport_master(dev); - - return -ENOMEM; + return err; } static int mlx5_ib_enable_driver(struct ib_device *dev) @@ -4067,6 +3994,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_srq = mlx5_ib_query_srq, .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, + .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, @@ -4207,6 +4135,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .get_port_immutable = mlx5_port_rep_immutable, .query_port = mlx5_ib_rep_query_port, + .query_pkey = mlx5_ib_rep_query_pkey, }; static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0fdc1b08e06..88cc26e008fc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #ifndef MLX5_IB_H @@ -683,11 +684,8 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_deferred_work; - wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { - struct rcu_head rcu; struct list_head elm; struct work_struct work; } odp_destroy; @@ -703,6 +701,12 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr) mr->umem->is_odp; } +static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_dmabuf; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; @@ -1029,6 +1033,11 @@ struct mlx5_var_table { u64 num_var_hw_entries; }; +struct mlx5_port_caps { + bool has_smi; + u8 ext_port_cap; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1056,11 +1065,6 @@ struct mlx5_ib_dev { u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; - /* - * Sleepable RCU that prevents destruction of MRs while they are still - * being used by a page fault handler. - */ - struct srcu_struct odp_srcu; struct xarray odp_mkeys; u32 null_mkey; @@ -1089,6 +1093,8 @@ struct mlx5_ib_dev { struct mlx5_var_table var_table; struct xarray sig_mrs; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u16 pkey_table_len; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1243,6 +1249,10 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int mlx5_ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, @@ -1253,11 +1263,13 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr); struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1279,9 +1291,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, @@ -1345,6 +1355,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1370,6 +1381,10 @@ static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; @@ -1576,6 +1591,29 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, return true; } +static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mmkey) +{ + refcount_set(&mmkey->usecount, 1); + + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key), + mmkey, GFP_KERNEL)); +} + +/* deref an mkey that can participate in ODP flow */ +static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + if (refcount_dec_and_test(&mmkey->usecount)) + wake_up(&mmkey->wait); +} + +/* deref an mkey that can participate in ODP flow and wait for relese */ +static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + mlx5r_deref_odp_mkey(mmkey); + wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0); +} + int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 24f8d59a42ea..db05b0e0a8d7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +37,8 @@ #include <linux/debugfs.h> #include <linux/export.h> #include <linux/delay.h> +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include <rdma/ib_verbs.h> @@ -155,6 +158,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.type = MLX5_MKEY_MR; mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); + init_waitqueue_head(&mr->mmkey.wait); WRITE_ONCE(dev->cache.last_add, jiffies); @@ -935,6 +939,17 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } +static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, + u64 iova) +{ + /* + * The alignment of iova has already been checked upon entering + * UVERBS_METHOD_REG_DMABUF_MR + */ + umem->iova = iova; + return PAGE_SIZE; +} + static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) @@ -944,7 +959,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5_ib_mr *mr; unsigned int page_size; - page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (umem->is_dmabuf) + page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); + else + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, + 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); ent = mr_cache_ent_from_order( @@ -980,7 +999,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); - mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); return mr; @@ -1201,8 +1219,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, /* * Send the DMA list to the HW for a normal MR using UMR. + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP + * flag may be used. */ -static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; @@ -1244,6 +1264,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) cur_mtt->ptag = cpu_to_be64(rdma_block_iter_dma_address(&biter) | MLX5_IB_MTT_PRESENT); + + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) + cur_mtt->ptag = 0; + cur_mtt++; } @@ -1528,10 +1552,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } odp->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); if (err) goto err_dereg_mr; @@ -1567,6 +1588,81 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return create_real_mr(pd, umem, iova, access_flags); } +static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + struct mlx5_ib_mr *mr = umem_dmabuf->private; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); +} + +static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { + .allow_peer2peer = 1, + .move_notify = mlx5_ib_dmabuf_invalidate_cb, +}; + +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || + !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, + "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n", + offset, virt_addr, length, fd, access_flags); + + /* dmabuf requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, + access_flags, + &mlx5_ib_dmabuf_attach_ops); + if (IS_ERR(umem_dmabuf)) { + mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", + PTR_ERR(umem_dmabuf)); + return ERR_CAST(umem_dmabuf); + } + + mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, + access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_CAST(mr); + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); + + atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); + umem_dmabuf->private = mr; + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_dmabuf_mr(mr); + if (err) + goto err_dereg_mr; + return &mr->ibmr; + +err_dereg_mr: + dereg_mr(dev, mr); + return ERR_PTR(err); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1740,8 +1836,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return ERR_PTR(err); return NULL; } - /* DM or ODP MR's don't have a umem so we can't re-use it */ - if (!mr->umem || is_odp_mr(mr)) + /* DM or ODP MR's don't have a normal umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; /* @@ -1760,10 +1856,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } /* - * DM doesn't have a PAS list so we can't re-use it, odp does but the - * logic around releasing the umem is different + * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does + * but the logic around releasing the umem is different */ - if (!mr->umem || is_odp_mr(mr)) + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && @@ -1876,6 +1972,8 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Stop all DMA */ if (is_odp_mr(mr)) mlx5_ib_fence_odp_mr(mr); + else if (is_dmabuf_mr(mr)) + mlx5_ib_fence_dmabuf_mr(mr); else clean_mr(dev, mr); @@ -2227,9 +2325,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, - GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mw->mmkey); if (err) goto free_mkey; } @@ -2249,14 +2345,13 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) /* - * pagefault_single_data_segment() may be accessing mmw under - * SRCU if the user bound an ODP MR to this MW. + * pagefault_single_data_segment() may be accessing mmw + * if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&mmw->mmkey); return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index aa2413b50adc..b103555b1f5d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -33,6 +33,8 @@ #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include <linux/kernel.h> +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include "mlx5_ib.h" #include "cmd.h" @@ -113,7 +115,6 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * - * srcu_read_lock() * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() @@ -124,12 +125,9 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the xa_store() visible to a racing thread. While SRCU is not - * technically required, using it gives consistent use of the SRCU - * locking around the xarray. + * the xa_store() visible to a racing thread. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -205,8 +203,8 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) } /* - * This must be called after the mr has been removed from implicit_children - * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * This must be called after the mr has been removed from implicit_children. + * NOTE: The MR does not necessarily have to be * empty here, parallel page faults could have raced with the free process and * added pages to it. */ @@ -216,19 +214,15 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - int srcu_key; - /* implicit_child_mr's are not allowed to have deferred work */ - WARN_ON(atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); @@ -236,26 +230,16 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); - if (atomic_dec_and_test(&imr->num_deferred_work)) - wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) { struct mlx5_ib_mr *mr = container_of(work, struct mlx5_ib_mr, odp_destroy.work); + struct mlx5_ib_mr *imr = mr->parent; free_implicit_child_mr(mr, true); -} - -static void free_implicit_child_mr_rcu(struct rcu_head *head) -{ - struct mlx5_ib_mr *mr = - container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); - - /* Freeing a MR is a sleeping operation, so bounce to a work queue */ - INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); - queue_work(system_unbound_wq, &mr->odp_destroy.work); + mlx5r_deref_odp_mkey(&imr->mmkey); } static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) @@ -264,21 +248,14 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *imr = mr->parent; - xa_lock(&imr->implicit_children); - /* - * This can race with mlx5_ib_free_implicit_mr(), the first one to - * reach the xa lock wins the race and destroys the MR. - */ - if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != - mr) - goto out_unlock; + if (!refcount_inc_not_zero(&imr->mmkey.usecount)) + return; - atomic_inc(&imr->num_deferred_work); - call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, - free_implicit_child_mr_rcu); + xa_erase(&imr->implicit_children, idx); -out_unlock: - xa_unlock(&imr->implicit_children); + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); } static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, @@ -490,6 +467,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->parent = imr; odp->private = mr; + /* + * First refcount is owned by the xarray and second refconut + * is returned to the caller. + */ + refcount_set(&mr->mmkey.usecount, 2); + err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, @@ -500,27 +483,28 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - /* - * Once the store to either xarray completes any error unwind has to - * use synchronize_srcu(). Avoid this with xa_reserve() - */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, - GFP_KERNEL); + xa_lock(&imr->implicit_children); + ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - goto out_mr; + goto out_lock; } /* * Another thread beat us to creating the child mr, use * theirs. */ - goto out_mr; + refcount_inc(&ret->mmkey.usecount); + goto out_lock; } + xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; +out_lock: + xa_unlock(&imr->implicit_children); out_mr: mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: @@ -559,8 +543,6 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; - atomic_set(&imr->num_deferred_work, 0); - init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -572,8 +554,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (err) goto out_mr; - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), - &imr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &imr->mmkey); if (err) goto out_mr; @@ -591,60 +572,35 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct mlx5_ib_dev *dev = mr_to_mdev(imr); - struct list_head destroy_list; struct mlx5_ib_mr *mtt; - struct mlx5_ib_mr *tmp; unsigned long idx; - INIT_LIST_HEAD(&destroy_list); - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); /* - * This stops the SRCU protected page fault path from touching either - * the imr or any children. The page fault path can only reach the - * children xarray via the imr. - */ - synchronize_srcu(&dev->odp_srcu); - - /* * All work on the prefetch list must be completed, xa_erase() prevented * new work from being created. */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - + mlx5r_deref_wait_odp_mkey(&imr->mmkey); /* * At this point it is forbidden for any other thread to enter * pagefault_mr() on this imr. It is already forbidden to call * pagefault_mr() on an implicit child. Due to this additions to * implicit_children are prevented. + * In addition, any new call to destroy_unused_implicit_child_mr() + * may return immediately. */ /* - * Block destroy_unused_implicit_child_mr() from incrementing - * num_deferred_work. - */ - xa_lock(&imr->implicit_children); - xa_for_each (&imr->implicit_children, idx, mtt) { - __xa_erase(&imr->implicit_children, idx); - list_add(&mtt->odp_destroy.elm, &destroy_list); - } - xa_unlock(&imr->implicit_children); - - /* - * Wait for any concurrent destroy_unused_implicit_child_mr() to - * complete. - */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - - /* * Fence the imr before we destroy the children. This allows us to * skip updating the XLT of the imr during destroy of the child mkey * the imr points to. */ mlx5_mr_cache_invalidate(imr); - list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + xa_for_each(&imr->implicit_children, idx, mtt) { + xa_erase(&imr->implicit_children, idx); free_implicit_child_mr(mtt, false); + } mlx5_mr_cache_free(dev, imr); ib_umem_odp_release(odp_imr); @@ -663,13 +619,39 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); - - wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); dma_fence_odp_mr(mr); } +/** + * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + mlx5r_deref_wait_odp_mkey(&mr->mmkey); + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + mlx5_mr_cache_invalidate(mr); + umem_dmabuf->private = NULL; + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (!mr->cache_ent) { + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) #define MLX5_PF_FLAGS_SNAPSHOT BIT(2) #define MLX5_PF_FLAGS_ENABLE BIT(3) @@ -747,8 +729,10 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *mtt; u64 len; + xa_lock(&imr->implicit_children); mtt = xa_load(&imr->implicit_children, idx); if (unlikely(!mtt)) { + xa_unlock(&imr->implicit_children); mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { ret = PTR_ERR(mtt); @@ -756,6 +740,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, } upd_start_idx = min(upd_start_idx, idx); upd_len = idx - upd_start_idx + 1; + } else { + refcount_inc(&mtt->mmkey.usecount); + xa_unlock(&imr->implicit_children); } umem_odp = to_ib_umem_odp(mtt->umem); @@ -764,6 +751,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, ret = pagefault_real_mr(mtt, umem_odp, user_va, len, bytes_mapped, flags); + + mlx5r_deref_odp_mkey(&mtt->mmkey); + if (ret < 0) goto out; user_va += len; @@ -803,6 +793,44 @@ out: return ret; } +static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + u32 xlt_flags = 0; + int err; + unsigned int page_size; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) { + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + return err; + } + + page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc, + log_page_size, 0, + umem_dmabuf->umem.iova); + if (unlikely(page_size < PAGE_SIZE)) { + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + err = -EINVAL; + } else { + err = mlx5_ib_update_mr_pas(mr, xlt_flags); + } + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (err) + return err; + + if (bytes_mapped) + *bytes_mapped += bcnt; + + return ib_umem_num_pages(mr->umem); +} + /* * Returns: * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are @@ -817,10 +845,12 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; + if (mr->umem->is_dmabuf) + return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags); + if (!odp->is_implicit_odp) { u64 user_va; @@ -847,6 +877,16 @@ int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) return ret >= 0 ? 0 : ret; } +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + int ret; + + ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL, + MLX5_PF_FLAGS_ENABLE); + + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -896,7 +936,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -905,14 +945,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->odp_srcu); - io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); if (!mmkey) { + xa_unlock(&dev->odp_mkeys); mlx5_ib_dbg( dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", @@ -925,12 +965,15 @@ next_mr: * faulted. */ ret = 0; - goto srcu_unlock; + goto end; } + refcount_inc(&mmkey->usecount); + xa_unlock(&dev->odp_mkeys); + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; - goto srcu_unlock; + goto end; } switch (mmkey->type) { @@ -939,7 +982,7 @@ next_mr: ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) - goto srcu_unlock; + goto end; mlx5_update_odp_stats(mr, faults, ret); @@ -954,7 +997,7 @@ next_mr: if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); ret = -EFAULT; - goto srcu_unlock; + goto end; } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + @@ -965,7 +1008,7 @@ next_mr: out = kzalloc(outlen, GFP_KERNEL); if (!out) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } cur_outlen = outlen; } @@ -975,7 +1018,7 @@ next_mr: ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) - goto srcu_unlock; + goto end; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); @@ -989,7 +1032,7 @@ next_mr: frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } frame->key = be32_to_cpu(pklm->key); @@ -1008,7 +1051,7 @@ next_mr: default: mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type); ret = -EFAULT; - goto srcu_unlock; + goto end; } if (head) { @@ -1021,10 +1064,13 @@ next_mr: depth = frame->depth; kfree(frame); + mlx5r_deref_odp_mkey(mmkey); goto next_mr; } -srcu_unlock: +end: + if (mmkey) + mlx5r_deref_odp_mkey(mmkey); while (head) { frame = head; head = frame->next; @@ -1032,24 +1078,25 @@ srcu_unlock: } kfree(out); - srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } -/** +/* * Parse a series of data segments for page fault handling. * - * @pfault contains page fault information. - * @wqe points at the first data segment in the WQE. - * @wqe_end points after the end of the WQE. - * @bytes_mapped receives the number of bytes that the function was able to - * map. This allows the caller to decide intelligently whether - * enough memory was mapped to resolve the page fault - * successfully (e.g. enough for the next MTU, or the entire - * WQE). - * @total_wqe_bytes receives the total data size of this WQE in bytes (minus - * the committed bytes). + * @dev: Pointer to mlx5 IB device + * @pfault: contains page fault information. + * @wqe: points at the first data segment in the WQE. + * @wqe_end: points after the end of the WQE. + * @bytes_mapped: receives the number of bytes that the function was able to + * map. This allows the caller to decide intelligently whether + * enough memory was mapped to resolve the page fault + * successfully (e.g. enough for the next MTU, or the entire + * WQE). + * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus + * the committed bytes). + * @receive_queue: receive WQE end of sg list * * Returns the number of pages loaded if positive, zero for an empty WQE, or a * negative error code. @@ -1738,8 +1785,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) - wake_up(&work->frags[i].mr->q_deferred_work); + mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey); + kvfree(work); } @@ -1749,27 +1796,30 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_core_mkey *mmkey; - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mr; - - lockdep_assert_held(&dev->odp_srcu); + struct mlx5_ib_mr *mr = NULL; + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) - return NULL; + goto end; mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) - return NULL; - - odp = to_ib_umem_odp(mr->umem); + if (mr->ibmr.pd != pd) { + mr = NULL; + goto end; + } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && - !odp->umem.writable) - return NULL; + !mr->umem->writable) { + mr = NULL; + goto end; + } + refcount_inc(&mmkey->usecount); +end: + xa_unlock(&dev->odp_mkeys); return mr; } @@ -1777,17 +1827,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); - struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; - int srcu_key; int ret; u32 i; /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = mr_to_mdev(work->frags[0].mr); - /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, work->frags[i].length, &bytes_mapped, @@ -1796,7 +1841,6 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) continue; mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); } - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } @@ -1820,9 +1864,6 @@ static bool init_prefetch_work(struct ib_pd *pd, work->num_sge = i; return false; } - - /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; @@ -1833,42 +1874,35 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 bytes_mapped = 0; - int srcu_key; int ret = 0; u32 i; - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < num_sge; ++i) { struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) { - ret = -ENOENT; - goto out; - } + if (!mr) + return -ENOENT; ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); - if (ret < 0) - goto out; + if (ret < 0) { + mlx5r_deref_odp_mkey(&mr->mmkey); + return ret; + } mlx5_update_odp_stats(mr, prefetch, ret); + mlx5r_deref_odp_mkey(&mr->mmkey); } - ret = 0; -out: - srcu_read_unlock(&dev->odp_srcu, srcu_key); - return ret; + return 0; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 pf_flags = 0; struct prefetch_mr_work *work; - int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1884,13 +1918,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - srcu_key = srcu_read_lock(&dev->odp_srcu); if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); return -EINVAL; } queue_work(system_unbound_wq, &work->work); - srcu_read_unlock(&dev->odp_srcu, srcu_key); return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb7cc642d87..ec4b3f6a8222 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1078,6 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT); MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); /* Set "fast registration enabled" for all kernel QPs */ @@ -1172,10 +1173,72 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) sq->flow_rule = NULL; } +static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq) +{ + bool fr_supported = + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; + + if (recv_cq && + recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (send_cq && + send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (ts_format == MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING && + !fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return ts_format; +} + static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer; __be64 *pas; @@ -1187,6 +1250,11 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int err; unsigned int page_offset_quantized; unsigned long page_size; + int ts_format; + + ts_format = get_sq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, ubuffer->buf_size, 0); @@ -1215,6 +1283,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); @@ -1263,7 +1332,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1274,9 +1343,14 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct ib_umem *umem = rq->base.ubuffer.umem; unsigned int page_offset_quantized; unsigned long page_size = 0; + int ts_format; size_t inlen; int err; + ts_format = get_rq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, @@ -1296,6 +1370,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, vsd, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, ts_format, ts_format); MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); @@ -1393,10 +1468,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u32 *in, size_t inlen, - struct ib_pd *pd, + u32 *in, size_t inlen, struct ib_pd *pd, struct ib_udata *udata, - struct mlx5_ib_create_qp_resp *resp) + struct mlx5_ib_create_qp_resp *resp, + struct ib_qp_init_attr *init_attr) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; @@ -1415,7 +1490,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd, + to_mcq(init_attr->send_cq)); if (err) goto err_destroy_tis; @@ -1437,7 +1513,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd, + to_mcq(init_attr->recv_cq)); if (err) goto err_destroy_sq; @@ -1907,6 +1984,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_cq *recv_cq; unsigned long flags; struct mlx5_ib_qp_base *base; + int ts_format; int mlx5_st; void *qpc; u32 *in; @@ -1944,6 +2022,13 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) return -EINVAL; + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + if (ts_format < 0) + return ts_format; + } + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, ¶ms->resp, &inlen, base, ucmd); if (err) @@ -1992,6 +2077,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + MLX5_SET(qpc, qpc, ts_format, ts_format); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) { @@ -2046,7 +2134,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, - ¶ms->resp); + ¶ms->resp, init_attr); } else err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); @@ -2432,9 +2520,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case MLX5_IB_QPT_HW_GSI: case IB_QPT_DRIVER: case IB_QPT_GSI: - if (dev->profile == &raw_eth_profile) - goto out; - fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2629,10 +2714,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int create_flags = attr->create_flags; bool cond; - if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) - if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) - return -EINVAL; - if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; @@ -3076,6 +3157,8 @@ static int ib_to_mlx5_rate_map(u8 rate) return 4; case IB_RATE_50_GBPS: return 5; + case IB_RATE_400_GBPS: + return 6; default: return rate + MLX5_STAT_RATE_OFFSET; } @@ -3183,11 +3266,13 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, alt ? attr->alt_pkey_index : attr->pkey_index); if (ah_flags & IB_AH_GRH) { - if (grh->sgid_index >= - dev->mdev->port_caps[port - 1].gid_table_len) { + const struct ib_port_immutable *immutable; + + immutable = ib_port_immutable_read(&dev->ib_dev, port); + if (grh->sgid_index >= immutable->gid_tbl_len) { pr_err("sgid_index (%u) too large. max is %d\n", grh->sgid_index, - dev->mdev->port_caps[port - 1].gid_table_len); + immutable->gid_tbl_len); return -EINVAL; } } @@ -4211,6 +4296,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + enum ib_qp_type qp_type) +{ + if (dev->profile != &raw_eth_profile) + return true; + + if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR) + return true; + + /* Internal QP used for wc testing, with NOPs in wq */ + if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) + return true; + + return false; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4221,7 +4323,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int port; + + if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) + return -EOPNOTSUPP; if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; @@ -4263,10 +4367,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - } - if (qp->flags & IB_QP_CREATE_SOURCE_QPN) { if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", @@ -4295,14 +4395,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_PKEY_INDEX) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) { - mlx5_ib_dbg(dev, "invalid pkey index %d\n", - attr->pkey_index); - goto out; - } + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index); + goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && @@ -5376,7 +5472,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) handle_drain_completion(cq, &rdrain, dev); } -/** +/* * Bind a qp to a counter. If @counter is NULL then bind the qp to * the default counter */ diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index d6038fb6c50c..cf2852cba45c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -1369,7 +1369,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, handle_qpt_uc(wr, &seg, &size); break; case IB_QPT_SMI: - if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) { mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); err = -EPERM; *bad_wr = wr; |