summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c231
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c14
-rw-r--r--drivers/infiniband/hw/mlx5/main.c147
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h60
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c137
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c327
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c160
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c2
8 files changed, 687 insertions, 391 deletions
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 819c142857d6..de3c2fc6f361 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -94,13 +94,13 @@ struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
u32 dinlen;
- u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+ u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)];
};
struct devx_umem_reg_cmd {
void *in;
u32 inlen;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 out[MLX5_ST_SZ_DW(create_umem_out)];
};
static struct mlx5_ib_ucontext *
@@ -111,8 +111,8 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
{
- u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
void *uctx;
int err;
u16 uid;
@@ -138,14 +138,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
if (err)
return err;
- uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ uid = MLX5_GET(create_uctx_out, out, uid);
return uid;
}
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {};
MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
MLX5_SET(destroy_uctx_in, in, uid, uid);
@@ -288,6 +288,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
return ((u64)opcode << 32) | obj_id;
}
+static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ case MLX5_CMD_OP_CREATE_UMEM:
+ return MLX5_GET(create_umem_out, out, umem_id);
+ case MLX5_CMD_OP_CREATE_MKEY:
+ return MLX5_GET(create_mkey_out, out, mkey_index);
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_GET(create_cq_out, out, cqn);
+ case MLX5_CMD_OP_ALLOC_PD:
+ return MLX5_GET(alloc_pd_out, out, pd);
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ return MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_GET(create_rmp_out, out, rmpn);
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_GET(create_sq_out, out, sqn);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_GET(create_rq_out, out, rqn);
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_GET(create_rqt_out, out, rqtn);
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_GET(create_tir_out, out, tirn);
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_GET(create_tis_out, out, tisn);
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ return MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ return MLX5_GET(create_flow_table_out, out, table_id);
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ return MLX5_GET(create_flow_group_out, out, group_id);
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ return MLX5_GET(set_fte_in, in, flow_index);
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ return MLX5_GET(alloc_packet_reformat_context_out, out,
+ packet_reformat_id);
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ return MLX5_GET(alloc_modify_header_context_out, out,
+ modify_header_id);
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ return MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ return MLX5_GET(set_l2_table_entry_in, in, table_index);
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_GET(create_qp_out, out, qpn);
+ case MLX5_CMD_OP_CREATE_SRQ:
+ return MLX5_GET(create_srq_out, out, srqn);
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_GET(create_dct_out, out, dctn);
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_GET(create_xrq_out, out, xrqn);
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ return MLX5_GET(attach_to_mcg_in, in, qpn);
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ return MLX5_GET(alloc_xrcd_out, out, xrcd);
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_GET(create_psv_out, out, psv0_index);
+ default:
+ /* The entry must match to one of the devx_is_obj_create_cmd */
+ WARN_ON(true);
+ return 0;
+ }
+}
+
static u64 devx_get_obj_id(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
@@ -399,8 +473,8 @@ static u64 devx_get_obj_id(const void *in)
break;
case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
- MLX5_GET(general_obj_in_cmd_hdr, in,
- obj_id));
+ MLX5_GET(query_modify_header_context_in,
+ in, modify_header_id));
break;
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
@@ -1019,63 +1093,76 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
u32 *dinlen,
u32 *obj_id)
{
- u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ *obj_id = devx_get_created_obj_id(in, out, opcode);
*dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
-
- MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
- switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
+ switch (opcode) {
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_type,
+ MLX5_GET(general_obj_in_cmd_hdr, in, obj_type));
break;
case MLX5_CMD_OP_CREATE_UMEM:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_umem_in, din, opcode,
MLX5_CMD_OP_DESTROY_UMEM);
+ MLX5_SET(destroy_umem_in, din, umem_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_MKEY:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id);
break;
case MLX5_CMD_OP_CREATE_CQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, cqn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_PD:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, din, pd, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_transport_domain_in, din, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, din, transport_domain,
+ *obj_id);
break;
case MLX5_CMD_OP_CREATE_RMP:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, din, sqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_RQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, din, rqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_RQT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_TIR:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, din, tirn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_TIS:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, din, tisn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_q_counter_in, din, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
- *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
MLX5_SET(destroy_flow_table_in, din, other_vport,
MLX5_GET(create_flow_table_in, in, other_vport));
MLX5_SET(destroy_flow_table_in, din, vport_number,
@@ -1083,12 +1170,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(destroy_flow_table_in, din, table_type,
MLX5_GET(create_flow_table_in, in, table_type));
MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_flow_table_in, din, opcode,
MLX5_CMD_OP_DESTROY_FLOW_TABLE);
break;
case MLX5_CMD_OP_CREATE_FLOW_GROUP:
*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
- *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
MLX5_SET(destroy_flow_group_in, din, other_vport,
MLX5_GET(create_flow_group_in, in, other_vport));
MLX5_SET(destroy_flow_group_in, din, vport_number,
@@ -1098,12 +1184,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(destroy_flow_group_in, din, table_id,
MLX5_GET(create_flow_group_in, in, table_id));
MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_flow_group_in, din, opcode,
MLX5_CMD_OP_DESTROY_FLOW_GROUP);
break;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
*dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
- *obj_id = MLX5_GET(set_fte_in, in, flow_index);
MLX5_SET(delete_fte_in, din, other_vport,
MLX5_GET(set_fte_in, in, other_vport));
MLX5_SET(delete_fte_in, din, vport_number,
@@ -1113,63 +1198,70 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(delete_fte_in, din, table_id,
MLX5_GET(set_fte_in, in, table_id));
MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_fte_in, din, opcode,
MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
break;
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_flow_counter_in, din, opcode,
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id,
+ *obj_id);
break;
case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_packet_reformat_context_in, din, opcode,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, din,
+ packet_reformat_id, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_modify_header_context_in, din, opcode,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, din,
+ modify_header_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
*dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
- *obj_id = MLX5_GET(create_scheduling_element_out, out,
- scheduling_element_id);
MLX5_SET(destroy_scheduling_element_in, din,
scheduling_hierarchy,
MLX5_GET(create_scheduling_element_in, in,
scheduling_hierarchy));
MLX5_SET(destroy_scheduling_element_in, din,
scheduling_element_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_scheduling_element_in, din, opcode,
MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
break;
case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
*dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
- *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_vxlan_udp_dport_in, din, opcode,
MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
break;
case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
*dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
- *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_l2_table_entry_in, din, opcode,
MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
break;
case MLX5_CMD_OP_CREATE_QP:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, din, srqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_XRC_SRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_xrc_srq_in, din, opcode,
MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_DCT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, dctn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_XRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id);
break;
case MLX5_CMD_OP_ATTACH_TO_MCG:
*dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
@@ -1178,16 +1270,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, din, opcode,
+ MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_XRCD:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id);
break;
case MLX5_CMD_OP_CREATE_PSV:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_psv_in, din, opcode,
MLX5_CMD_OP_DESTROY_PSV);
- MLX5_SET(destroy_psv_in, din, psvn,
- MLX5_GET(create_psv_out, out, psv0_index));
+ MLX5_SET(destroy_psv_in, din, psvn, *obj_id);
break;
default:
/* The entry must match to one of the devx_is_obj_create_cmd */
@@ -1215,9 +1310,9 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
mkey->size = MLX5_GET64(mkc, mkc, len);
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+ init_waitqueue_head(&mkey->wait);
- return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
- GFP_KERNEL));
+ return mlx5r_store_odp_mkey(dev, mkey);
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1290,16 +1385,15 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
int ret;
dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY &&
+ xa_erase(&obj->ib_dev->odp_mkeys,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key)))
/*
* The pagefault_single_data_segment() does commands against
* the mmkey, we must wait for that to stop before freeing the
* mkey, as another allocation could get the same mkey #.
*/
- xa_erase(&obj->ib_dev->odp_mkeys,
- mlx5_base_mkey(obj->devx_mr.mmkey.key));
- synchronize_srcu(&dev->odp_srcu);
- }
+ mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey);
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
@@ -1345,6 +1439,16 @@ out:
rcu_read_unlock();
}
+static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, apu) ||
+ !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ apu_thread_cq))
+ return false;
+
+ return true;
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -1398,7 +1502,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
obj->flags |= DEVX_OBJ_FLAGS_DCT;
err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
cmd_in_len, cmd_out, cmd_out_len);
- } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
+ } else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
+ !is_apu_thread_cq(dev, cmd_in)) {
obj->flags |= DEVX_OBJ_FLAGS_CQ;
obj->core_cq.comp = devx_cq_comp;
err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
@@ -1968,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
num_alloc_xa_entries++;
event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
- if (!event_sub)
+ if (!event_sub) {
+ err = -ENOMEM;
goto err;
+ }
list_add_tail(&event_sub->event_list, &sub_list);
uverbs_uobject_get(&ev_file->uobj);
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 9bb9bb058932..652c6ccf1881 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -48,7 +48,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
return true;
- return dev->mdev->port_caps[port_num - 1].has_smi;
+ return dev->port_caps[port_num - 1].has_smi;
}
static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
@@ -279,7 +279,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
@@ -299,7 +299,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
packet_error = be16_to_cpu(out_mad->status);
- dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
+ dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
out:
@@ -308,8 +308,8 @@ out:
return err;
}
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad)
+static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
+ struct ib_smp *out_mad)
{
struct ib_smp *in_mad = NULL;
int err = -ENOMEM;
@@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
props->gid_tbl_len = out_mad->data[50];
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
- props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len;
+ props->pkey_tbl_len = dev->pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
@@ -589,7 +589,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == 4) {
- if (mdev->port_caps[port - 1].ext_port_cap &
+ if (dev->port_caps[port - 1].ext_port_cap &
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index aabdc07e4753..0d69a697d75f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
#include <linux/debugfs.h>
@@ -461,7 +462,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
bool put_mdev = true;
- u16 qkey_viol_cntr;
u32 eth_prot_oper;
u8 mdev_port_num;
bool ext;
@@ -499,20 +499,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width, ext);
- props->port_cap_flags |= IB_PORT_CM_SUP;
- props->ip_gids = true;
+ if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) {
+ u16 qkey_viol_cntr;
- props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
- roce_address_table_size);
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+ }
props->max_mtu = IB_MTU_4096;
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
- mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
- props->qkey_viol_cntr = qkey_viol_cntr;
-
/* If this is a stub query for an unaffiliated port stop here */
if (!put_mdev)
goto out;
@@ -815,9 +817,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (err)
return err;
- err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
- if (err)
- return err;
+ props->max_pkeys = dev->pkey_table_len;
err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
if (err)
@@ -1384,19 +1384,17 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
- int ret;
+ return mlx5_query_port_roce(ibdev, port, props);
+}
- /* Only link layer == ethernet is valid for representors
- * and we always use port 1
+static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ /* Default special Pkey for representor device port as per the
+ * IB specification 1.3 section 10.9.1.2.
*/
- ret = mlx5_query_port_roce(ibdev, port, props);
- if (ret || !props)
- return ret;
-
- /* We don't support GIDS */
- props->gid_tbl_len = 0;
-
- return ret;
+ *pkey = 0xffff;
+ return 0;
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -2935,8 +2933,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
int err;
int port;
- for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) {
- dev->mdev->port_caps[port - 1].has_smi = false;
+ for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) {
+ dev->port_caps[port - 1].has_smi = false;
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB) {
if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
@@ -2948,10 +2946,10 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
port, err);
return err;
}
- dev->mdev->port_caps[port - 1].has_smi =
+ dev->port_caps[port - 1].has_smi =
vport_ctx.has_smi;
} else {
- dev->mdev->port_caps[port - 1].has_smi = true;
+ dev->port_caps[port - 1].has_smi = true;
}
}
}
@@ -2960,63 +2958,12 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
- int port;
+ unsigned int port;
- for (port = 1; port <= dev->num_ports; port++)
+ rdma_for_each_port (&dev->ib_dev, port)
mlx5_query_ext_port_caps(dev, port);
}
-static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
-{
- struct ib_device_attr *dprops = NULL;
- struct ib_port_attr *pprops = NULL;
- int err = -ENOMEM;
-
- pprops = kzalloc(sizeof(*pprops), GFP_KERNEL);
- if (!pprops)
- goto out;
-
- dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
- if (!dprops)
- goto out;
-
- err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL);
- if (err) {
- mlx5_ib_warn(dev, "query_device failed %d\n", err);
- goto out;
- }
-
- err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
- if (err) {
- mlx5_ib_warn(dev, "query_port %d failed %d\n",
- port, err);
- goto out;
- }
-
- dev->mdev->port_caps[port - 1].pkey_table_len =
- dprops->max_pkeys;
- dev->mdev->port_caps[port - 1].gid_table_len =
- pprops->gid_tbl_len;
- mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n",
- port, dprops->max_pkeys, pprops->gid_tbl_len);
-
-out:
- kfree(pprops);
- kfree(dprops);
-
- return err;
-}
-
-static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
-{
- /* For representors use port 1, is this is the only native
- * port
- */
- if (dev->is_rep)
- return __get_port_caps(dev, 1);
- return __get_port_caps(dev, port);
-}
-
static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
{
switch (umr_fence_cap) {
@@ -3488,10 +3435,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
if (err)
goto unbind;
- err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev));
- if (err)
- goto unbind;
-
err = mlx5_add_netdev_notifier(ibdev, port_num);
if (err) {
mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
@@ -3569,11 +3512,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
break;
}
}
- if (!bound) {
- get_port_caps(dev, i + 1);
+ if (!bound)
mlx5_ib_dbg(dev, "no free port found for port %d\n",
i + 1);
- }
}
list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
@@ -3926,8 +3867,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
WARN_ON(!xa_empty(&dev->odp_mkeys));
- cleanup_srcu_struct(&dev->odp_srcu);
-
+ mutex_destroy(&dev->cap_mask_mutex);
WARN_ON(!xa_empty(&dev->sig_mrs));
WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
}
@@ -3938,6 +3878,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
int err;
int i;
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
+ dev->ib_dev.phys_port_cnt = dev->num_ports;
+ dev->ib_dev.dev.parent = mdev->device;
+ dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
+
for (i = 0; i < dev->num_ports; i++) {
spin_lock_init(&dev->port[i].mp.mpi_lock);
rwlock_init(&dev->port[i].roce.netdev_lock);
@@ -3956,27 +3902,14 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (err)
goto err_mp;
- if (!mlx5_core_mp_enabled(mdev)) {
- for (i = 1; i <= dev->num_ports; i++) {
- err = get_port_caps(dev, i);
- if (err)
- break;
- }
- } else {
- err = get_port_caps(dev, mlx5_core_native_port_num(mdev));
- }
+ err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len);
if (err)
goto err_mp;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- dev->ib_dev.node_type = RDMA_NODE_IB_CA;
- dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
- dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
- dev->ib_dev.dev.parent = mdev->device;
- dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
@@ -3987,17 +3920,11 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
-
- err = init_srcu_struct(&dev->odp_srcu);
- if (err)
- goto err_mp;
-
return 0;
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
-
- return -ENOMEM;
+ return err;
}
static int mlx5_ib_enable_driver(struct ib_device *dev)
@@ -4067,6 +3994,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.query_srq = mlx5_ib_query_srq,
.query_ucontext = mlx5_ib_query_ucontext,
.reg_user_mr = mlx5_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf,
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
@@ -4207,6 +4135,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
.get_port_immutable = mlx5_port_rep_immutable,
.query_port = mlx5_ib_rep_query_port,
+ .query_pkey = mlx5_ib_rep_query_pkey,
};
static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b0fdc1b08e06..88cc26e008fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
#ifndef MLX5_IB_H
@@ -683,11 +684,8 @@ struct mlx5_ib_mr {
u64 pi_iova;
/* For ODP and implicit */
- atomic_t num_deferred_work;
- wait_queue_head_t q_deferred_work;
struct xarray implicit_children;
union {
- struct rcu_head rcu;
struct list_head elm;
struct work_struct work;
} odp_destroy;
@@ -703,6 +701,12 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
mr->umem->is_odp;
}
+static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_dmabuf;
+}
+
struct mlx5_ib_mw {
struct ib_mw ibmw;
struct mlx5_core_mkey mmkey;
@@ -1029,6 +1033,11 @@ struct mlx5_var_table {
u64 num_var_hw_entries;
};
+struct mlx5_port_caps {
+ bool has_smi;
+ u8 ext_port_cap;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -1056,11 +1065,6 @@ struct mlx5_ib_dev {
u64 odp_max_size;
struct mlx5_ib_pf_eq odp_pf_eq;
- /*
- * Sleepable RCU that prevents destruction of MRs while they are still
- * being used by a page fault handler.
- */
- struct srcu_struct odp_srcu;
struct xarray odp_mkeys;
u32 null_mkey;
@@ -1089,6 +1093,8 @@ struct mlx5_ib_dev {
struct mlx5_var_table var_table;
struct xarray sig_mrs;
+ struct mlx5_port_caps port_caps[MLX5_MAX_PORTS];
+ u16 pkey_table_len;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1243,6 +1249,10 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_udata *udata);
int mlx5_ib_advise_mr(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags,
@@ -1253,11 +1263,13 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
+int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct ib_udata *udata,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
+void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr);
struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@@ -1279,9 +1291,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad);
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port);
int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid);
int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
@@ -1345,6 +1355,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1370,6 +1381,10 @@ static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
return -EOPNOTSUPP;
}
+static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
@@ -1576,6 +1591,29 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
return true;
}
+static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_core_mkey *mmkey)
+{
+ refcount_set(&mmkey->usecount, 1);
+
+ return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key),
+ mmkey, GFP_KERNEL));
+}
+
+/* deref an mkey that can participate in ODP flow */
+static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey)
+{
+ if (refcount_dec_and_test(&mmkey->usecount))
+ wake_up(&mmkey->wait);
+}
+
+/* deref an mkey that can participate in ODP flow and wait for relese */
+static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey)
+{
+ mlx5r_deref_odp_mkey(mmkey);
+ wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0);
+}
+
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 24f8d59a42ea..db05b0e0a8d7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -36,6 +37,8 @@
#include <linux/debugfs.h>
#include <linux/export.h>
#include <linux/delay.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <rdma/ib_verbs.h>
@@ -155,6 +158,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
mr->mmkey.type = MLX5_MKEY_MR;
mr->mmkey.key |= mlx5_idx_to_mkey(
MLX5_GET(create_mkey_out, mr->out, mkey_index));
+ init_waitqueue_head(&mr->mmkey.wait);
WRITE_ONCE(dev->cache.last_add, jiffies);
@@ -935,6 +939,17 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
mr->access_flags = access_flags;
}
+static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
+ u64 iova)
+{
+ /*
+ * The alignment of iova has already been checked upon entering
+ * UVERBS_METHOD_REG_DMABUF_MR
+ */
+ umem->iova = iova;
+ return PAGE_SIZE;
+}
+
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags)
@@ -944,7 +959,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct mlx5_ib_mr *mr;
unsigned int page_size;
- page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+ if (umem->is_dmabuf)
+ page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
+ else
+ page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
+ 0, iova);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
ent = mr_cache_ent_from_order(
@@ -980,7 +999,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
mr->page_shift = order_base_2(page_size);
- mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags);
return mr;
@@ -1201,8 +1219,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
/*
* Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
*/
-static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
@@ -1244,6 +1264,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
+
+ if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+ cur_mtt->ptag = 0;
+
cur_mtt++;
}
@@ -1528,10 +1552,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
}
odp->private = mr;
- init_waitqueue_head(&mr->q_deferred_work);
- atomic_set(&mr->num_deferred_work, 0);
- err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_KERNEL));
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
if (err)
goto err_dereg_mr;
@@ -1567,6 +1588,81 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return create_real_mr(pd, umem, iova, access_flags);
}
+static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+ struct mlx5_ib_mr *mr = umem_dmabuf->private;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt)
+ return;
+
+ mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+}
+
+static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
+ .allow_peer2peer = 1,
+ .move_notify = mlx5_ib_dmabuf_invalidate_cb,
+};
+
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr = NULL;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mlx5_ib_dbg(dev,
+ "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
+ offset, virt_addr, length, fd, access_flags);
+
+ /* dmabuf requires xlt update via umr to work. */
+ if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
+ access_flags,
+ &mlx5_ib_dmabuf_attach_ops);
+ if (IS_ERR(umem_dmabuf)) {
+ mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
+ PTR_ERR(umem_dmabuf));
+ return ERR_CAST(umem_dmabuf);
+ }
+
+ mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
+ access_flags);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&umem_dmabuf->umem);
+ return ERR_CAST(mr);
+ }
+
+ mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
+
+ atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
+ umem_dmabuf->private = mr;
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
+
+ err = mlx5_ib_init_dmabuf_mr(mr);
+ if (err)
+ goto err_dereg_mr;
+ return &mr->ibmr;
+
+err_dereg_mr:
+ dereg_mr(dev, mr);
+ return ERR_PTR(err);
+}
+
/**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence
@@ -1740,8 +1836,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
return ERR_PTR(err);
return NULL;
}
- /* DM or ODP MR's don't have a umem so we can't re-use it */
- if (!mr->umem || is_odp_mr(mr))
+ /* DM or ODP MR's don't have a normal umem so we can't re-use it */
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
goto recreate;
/*
@@ -1760,10 +1856,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
/*
- * DM doesn't have a PAS list so we can't re-use it, odp does but the
- * logic around releasing the umem is different
+ * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does
+ * but the logic around releasing the umem is different
*/
- if (!mr->umem || is_odp_mr(mr))
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
goto recreate;
if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
@@ -1876,6 +1972,8 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
/* Stop all DMA */
if (is_odp_mr(mr))
mlx5_ib_fence_odp_mr(mr);
+ else if (is_dmabuf_mr(mr))
+ mlx5_ib_fence_dmabuf_mr(mr);
else
clean_mr(dev, mr);
@@ -2227,9 +2325,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
}
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- err = xa_err(xa_store(&dev->odp_mkeys,
- mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
- GFP_KERNEL));
+ err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
if (err)
goto free_mkey;
}
@@ -2249,14 +2345,13 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
/*
- * pagefault_single_data_segment() may be accessing mmw under
- * SRCU if the user bound an ODP MR to this MW.
+ * pagefault_single_data_segment() may be accessing mmw
+ * if the user bound an ODP MR to this MW.
*/
- synchronize_srcu(&dev->odp_srcu);
- }
+ mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index aa2413b50adc..b103555b1f5d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -33,6 +33,8 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <linux/kernel.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include "mlx5_ib.h"
#include "cmd.h"
@@ -113,7 +115,6 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
* xarray would be protected by the umem_mutex, however that is not
* possible. Instead this uses a weaker update-then-lock pattern:
*
- * srcu_read_lock()
* xa_store()
* mutex_lock(umem_mutex)
* mlx5_ib_update_xlt()
@@ -124,12 +125,9 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
* before destroying.
*
* The umem_mutex provides the acquire/release semantic needed to make
- * the xa_store() visible to a racing thread. While SRCU is not
- * technically required, using it gives consistent use of the SRCU
- * locking around the xarray.
+ * the xa_store() visible to a racing thread.
*/
lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
- lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu);
for (; pklm != end; pklm++, idx++) {
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
@@ -205,8 +203,8 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
}
/*
- * This must be called after the mr has been removed from implicit_children
- * and the SRCU synchronized. NOTE: The MR does not necessarily have to be
+ * This must be called after the mr has been removed from implicit_children.
+ * NOTE: The MR does not necessarily have to be
* empty here, parallel page faults could have raced with the free process and
* added pages to it.
*/
@@ -216,19 +214,15 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
- int srcu_key;
- /* implicit_child_mr's are not allowed to have deferred work */
- WARN_ON(atomic_read(&mr->num_deferred_work));
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
if (need_imr_xlt) {
- srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu);
mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(mr->parent, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
- srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key);
}
dma_fence_odp_mr(mr);
@@ -236,26 +230,16 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
mr->parent = NULL;
mlx5_mr_cache_free(mr_to_mdev(mr), mr);
ib_umem_odp_release(odp);
- if (atomic_dec_and_test(&imr->num_deferred_work))
- wake_up(&imr->q_deferred_work);
}
static void free_implicit_child_mr_work(struct work_struct *work)
{
struct mlx5_ib_mr *mr =
container_of(work, struct mlx5_ib_mr, odp_destroy.work);
+ struct mlx5_ib_mr *imr = mr->parent;
free_implicit_child_mr(mr, true);
-}
-
-static void free_implicit_child_mr_rcu(struct rcu_head *head)
-{
- struct mlx5_ib_mr *mr =
- container_of(head, struct mlx5_ib_mr, odp_destroy.rcu);
-
- /* Freeing a MR is a sleeping operation, so bounce to a work queue */
- INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
- queue_work(system_unbound_wq, &mr->odp_destroy.work);
+ mlx5r_deref_odp_mkey(&imr->mmkey);
}
static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
@@ -264,21 +248,14 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *imr = mr->parent;
- xa_lock(&imr->implicit_children);
- /*
- * This can race with mlx5_ib_free_implicit_mr(), the first one to
- * reach the xa lock wins the race and destroys the MR.
- */
- if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) !=
- mr)
- goto out_unlock;
+ if (!refcount_inc_not_zero(&imr->mmkey.usecount))
+ return;
- atomic_inc(&imr->num_deferred_work);
- call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu,
- free_implicit_child_mr_rcu);
+ xa_erase(&imr->implicit_children, idx);
-out_unlock:
- xa_unlock(&imr->implicit_children);
+ /* Freeing a MR is a sleeping operation, so bounce to a work queue */
+ INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
+ queue_work(system_unbound_wq, &mr->odp_destroy.work);
}
static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
@@ -490,6 +467,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
mr->parent = imr;
odp->private = mr;
+ /*
+ * First refcount is owned by the xarray and second refconut
+ * is returned to the caller.
+ */
+ refcount_set(&mr->mmkey.usecount, 2);
+
err = mlx5_ib_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
PAGE_SHIFT,
@@ -500,27 +483,28 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
goto out_mr;
}
- /*
- * Once the store to either xarray completes any error unwind has to
- * use synchronize_srcu(). Avoid this with xa_reserve()
- */
- ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
- GFP_KERNEL);
+ xa_lock(&imr->implicit_children);
+ ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
+ GFP_KERNEL);
if (unlikely(ret)) {
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
- goto out_mr;
+ goto out_lock;
}
/*
* Another thread beat us to creating the child mr, use
* theirs.
*/
- goto out_mr;
+ refcount_inc(&ret->mmkey.usecount);
+ goto out_lock;
}
+ xa_unlock(&imr->implicit_children);
mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
return mr;
+out_lock:
+ xa_unlock(&imr->implicit_children);
out_mr:
mlx5_mr_cache_free(mr_to_mdev(imr), mr);
out_umem:
@@ -559,8 +543,6 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->ibmr.device = &dev->ib_dev;
imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
- atomic_set(&imr->num_deferred_work, 0);
- init_waitqueue_head(&imr->q_deferred_work);
xa_init(&imr->implicit_children);
err = mlx5_ib_update_xlt(imr, 0,
@@ -572,8 +554,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
if (err)
goto out_mr;
- err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key),
- &imr->mmkey, GFP_KERNEL));
+ err = mlx5r_store_odp_mkey(dev, &imr->mmkey);
if (err)
goto out_mr;
@@ -591,60 +572,35 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
struct mlx5_ib_dev *dev = mr_to_mdev(imr);
- struct list_head destroy_list;
struct mlx5_ib_mr *mtt;
- struct mlx5_ib_mr *tmp;
unsigned long idx;
- INIT_LIST_HEAD(&destroy_list);
-
xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key));
/*
- * This stops the SRCU protected page fault path from touching either
- * the imr or any children. The page fault path can only reach the
- * children xarray via the imr.
- */
- synchronize_srcu(&dev->odp_srcu);
-
- /*
* All work on the prefetch list must be completed, xa_erase() prevented
* new work from being created.
*/
- wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
-
+ mlx5r_deref_wait_odp_mkey(&imr->mmkey);
/*
* At this point it is forbidden for any other thread to enter
* pagefault_mr() on this imr. It is already forbidden to call
* pagefault_mr() on an implicit child. Due to this additions to
* implicit_children are prevented.
+ * In addition, any new call to destroy_unused_implicit_child_mr()
+ * may return immediately.
*/
/*
- * Block destroy_unused_implicit_child_mr() from incrementing
- * num_deferred_work.
- */
- xa_lock(&imr->implicit_children);
- xa_for_each (&imr->implicit_children, idx, mtt) {
- __xa_erase(&imr->implicit_children, idx);
- list_add(&mtt->odp_destroy.elm, &destroy_list);
- }
- xa_unlock(&imr->implicit_children);
-
- /*
- * Wait for any concurrent destroy_unused_implicit_child_mr() to
- * complete.
- */
- wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
-
- /*
* Fence the imr before we destroy the children. This allows us to
* skip updating the XLT of the imr during destroy of the child mkey
* the imr points to.
*/
mlx5_mr_cache_invalidate(imr);
- list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm)
+ xa_for_each(&imr->implicit_children, idx, mtt) {
+ xa_erase(&imr->implicit_children, idx);
free_implicit_child_mr(mtt, false);
+ }
mlx5_mr_cache_free(dev, imr);
ib_umem_odp_release(odp_imr);
@@ -663,13 +619,39 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
/* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&mr_to_mdev(mr)->odp_srcu);
-
- wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
dma_fence_odp_mr(mr);
}
+/**
+ * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR
+ * @mr: to fence
+ *
+ * On return no parallel threads will be touching this MR and no DMA will be
+ * active.
+ */
+void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+
+ /* Prevent new page faults and prefetch requests from succeeding */
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
+
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ mlx5_mr_cache_invalidate(mr);
+ umem_dmabuf->private = NULL;
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!mr->cache_ent) {
+ mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey);
+ WARN_ON(mr->descs);
+ }
+}
+
#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
#define MLX5_PF_FLAGS_ENABLE BIT(3)
@@ -747,8 +729,10 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
struct mlx5_ib_mr *mtt;
u64 len;
+ xa_lock(&imr->implicit_children);
mtt = xa_load(&imr->implicit_children, idx);
if (unlikely(!mtt)) {
+ xa_unlock(&imr->implicit_children);
mtt = implicit_get_child_mr(imr, idx);
if (IS_ERR(mtt)) {
ret = PTR_ERR(mtt);
@@ -756,6 +740,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
}
upd_start_idx = min(upd_start_idx, idx);
upd_len = idx - upd_start_idx + 1;
+ } else {
+ refcount_inc(&mtt->mmkey.usecount);
+ xa_unlock(&imr->implicit_children);
}
umem_odp = to_ib_umem_odp(mtt->umem);
@@ -764,6 +751,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
ret = pagefault_real_mr(mtt, umem_odp, user_va, len,
bytes_mapped, flags);
+
+ mlx5r_deref_odp_mkey(&mtt->mmkey);
+
if (ret < 0)
goto out;
user_va += len;
@@ -803,6 +793,44 @@ out:
return ret;
}
+static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
+ u32 *bytes_mapped, u32 flags)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ u32 xlt_flags = 0;
+ int err;
+ unsigned int page_size;
+
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ err = ib_umem_dmabuf_map_pages(umem_dmabuf);
+ if (err) {
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+ return err;
+ }
+
+ page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc,
+ log_page_size, 0,
+ umem_dmabuf->umem.iova);
+ if (unlikely(page_size < PAGE_SIZE)) {
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ err = -EINVAL;
+ } else {
+ err = mlx5_ib_update_mr_pas(mr, xlt_flags);
+ }
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
+ if (err)
+ return err;
+
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
+
+ return ib_umem_num_pages(mr->umem);
+}
+
/*
* Returns:
* -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
@@ -817,10 +845,12 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu);
if (unlikely(io_virt < mr->mmkey.iova))
return -EFAULT;
+ if (mr->umem->is_dmabuf)
+ return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags);
+
if (!odp->is_implicit_odp) {
u64 user_va;
@@ -847,6 +877,16 @@ int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
return ret >= 0 ? 0 : ret;
}
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ int ret;
+
+ ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL,
+ MLX5_PF_FLAGS_ENABLE);
+
+ return ret >= 0 ? 0 : ret;
+}
+
struct pf_frame {
struct pf_frame *next;
u32 key;
@@ -896,7 +936,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
u32 *bytes_committed,
u32 *bytes_mapped)
{
- int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
@@ -905,14 +945,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
size_t offset;
int ndescs;
- srcu_key = srcu_read_lock(&dev->odp_srcu);
-
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
next_mr:
+ xa_lock(&dev->odp_mkeys);
mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
if (!mmkey) {
+ xa_unlock(&dev->odp_mkeys);
mlx5_ib_dbg(
dev,
"skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
@@ -925,12 +965,15 @@ next_mr:
* faulted.
*/
ret = 0;
- goto srcu_unlock;
+ goto end;
}
+ refcount_inc(&mmkey->usecount);
+ xa_unlock(&dev->odp_mkeys);
+
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
switch (mmkey->type) {
@@ -939,7 +982,7 @@ next_mr:
ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0);
if (ret < 0)
- goto srcu_unlock;
+ goto end;
mlx5_update_odp_stats(mr, faults, ret);
@@ -954,7 +997,7 @@ next_mr:
if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
mlx5_ib_dbg(dev, "indirection level exceeded\n");
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
@@ -965,7 +1008,7 @@ next_mr:
out = kzalloc(outlen, GFP_KERNEL);
if (!out) {
ret = -ENOMEM;
- goto srcu_unlock;
+ goto end;
}
cur_outlen = outlen;
}
@@ -975,7 +1018,7 @@ next_mr:
ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
if (ret)
- goto srcu_unlock;
+ goto end;
offset = io_virt - MLX5_GET64(query_mkey_out, out,
memory_key_mkey_entry.start_addr);
@@ -989,7 +1032,7 @@ next_mr:
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
if (!frame) {
ret = -ENOMEM;
- goto srcu_unlock;
+ goto end;
}
frame->key = be32_to_cpu(pklm->key);
@@ -1008,7 +1051,7 @@ next_mr:
default:
mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
if (head) {
@@ -1021,10 +1064,13 @@ next_mr:
depth = frame->depth;
kfree(frame);
+ mlx5r_deref_odp_mkey(mmkey);
goto next_mr;
}
-srcu_unlock:
+end:
+ if (mmkey)
+ mlx5r_deref_odp_mkey(mmkey);
while (head) {
frame = head;
head = frame->next;
@@ -1032,24 +1078,25 @@ srcu_unlock:
}
kfree(out);
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
}
-/**
+/*
* Parse a series of data segments for page fault handling.
*
- * @pfault contains page fault information.
- * @wqe points at the first data segment in the WQE.
- * @wqe_end points after the end of the WQE.
- * @bytes_mapped receives the number of bytes that the function was able to
- * map. This allows the caller to decide intelligently whether
- * enough memory was mapped to resolve the page fault
- * successfully (e.g. enough for the next MTU, or the entire
- * WQE).
- * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
- * the committed bytes).
+ * @dev: Pointer to mlx5 IB device
+ * @pfault: contains page fault information.
+ * @wqe: points at the first data segment in the WQE.
+ * @wqe_end: points after the end of the WQE.
+ * @bytes_mapped: receives the number of bytes that the function was able to
+ * map. This allows the caller to decide intelligently whether
+ * enough memory was mapped to resolve the page fault
+ * successfully (e.g. enough for the next MTU, or the entire
+ * WQE).
+ * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus
+ * the committed bytes).
+ * @receive_queue: receive WQE end of sg list
*
* Returns the number of pages loaded if positive, zero for an empty WQE, or a
* negative error code.
@@ -1738,8 +1785,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work)
u32 i;
for (i = 0; i < work->num_sge; ++i)
- if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
- wake_up(&work->frags[i].mr->q_deferred_work);
+ mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey);
+
kvfree(work);
}
@@ -1749,27 +1796,30 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_core_mkey *mmkey;
- struct ib_umem_odp *odp;
- struct mlx5_ib_mr *mr;
-
- lockdep_assert_held(&dev->odp_srcu);
+ struct mlx5_ib_mr *mr = NULL;
+ xa_lock(&dev->odp_mkeys);
mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR)
- return NULL;
+ goto end;
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (mr->ibmr.pd != pd)
- return NULL;
-
- odp = to_ib_umem_odp(mr->umem);
+ if (mr->ibmr.pd != pd) {
+ mr = NULL;
+ goto end;
+ }
/* prefetch with write-access must be supported by the MR */
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
- !odp->umem.writable)
- return NULL;
+ !mr->umem->writable) {
+ mr = NULL;
+ goto end;
+ }
+ refcount_inc(&mmkey->usecount);
+end:
+ xa_unlock(&dev->odp_mkeys);
return mr;
}
@@ -1777,17 +1827,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
{
struct prefetch_mr_work *work =
container_of(w, struct prefetch_mr_work, work);
- struct mlx5_ib_dev *dev;
u32 bytes_mapped = 0;
- int srcu_key;
int ret;
u32 i;
/* We rely on IB/core that work is executed if we have num_sge != 0 only. */
WARN_ON(!work->num_sge);
- dev = mr_to_mdev(work->frags[0].mr);
- /* SRCU should be held when calling to mlx5_odp_populate_xlt() */
- srcu_key = srcu_read_lock(&dev->odp_srcu);
for (i = 0; i < work->num_sge; ++i) {
ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
work->frags[i].length, &bytes_mapped,
@@ -1796,7 +1841,6 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
continue;
mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
}
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
destroy_prefetch_work(work);
}
@@ -1820,9 +1864,6 @@ static bool init_prefetch_work(struct ib_pd *pd,
work->num_sge = i;
return false;
}
-
- /* Keep the MR pointer will valid outside the SRCU */
- atomic_inc(&work->frags[i].mr->num_deferred_work);
}
work->num_sge = num_sge;
return true;
@@ -1833,42 +1874,35 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
u32 pf_flags, struct ib_sge *sg_list,
u32 num_sge)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
u32 bytes_mapped = 0;
- int srcu_key;
int ret = 0;
u32 i;
- srcu_key = srcu_read_lock(&dev->odp_srcu);
for (i = 0; i < num_sge; ++i) {
struct mlx5_ib_mr *mr;
mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
- if (!mr) {
- ret = -ENOENT;
- goto out;
- }
+ if (!mr)
+ return -ENOENT;
ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
&bytes_mapped, pf_flags);
- if (ret < 0)
- goto out;
+ if (ret < 0) {
+ mlx5r_deref_odp_mkey(&mr->mmkey);
+ return ret;
+ }
mlx5_update_odp_stats(mr, prefetch, ret);
+ mlx5r_deref_odp_mkey(&mr->mmkey);
}
- ret = 0;
-out:
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
- return ret;
+ return 0;
}
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
u32 pf_flags = 0;
struct prefetch_mr_work *work;
- int srcu_key;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
@@ -1884,13 +1918,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
if (!work)
return -ENOMEM;
- srcu_key = srcu_read_lock(&dev->odp_srcu);
if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
destroy_prefetch_work(work);
return -EINVAL;
}
queue_work(system_unbound_wq, &work->work);
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0cb7cc642d87..ec4b3f6a8222 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1078,6 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
MLX5_SET(qpc, qpc, uar_page, uar_index);
+ MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT);
MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
/* Set "fast registration enabled" for all kernel QPs */
@@ -1172,10 +1173,72 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq)
sq->flow_rule = NULL;
}
+static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
+{
+ bool fr_supported =
+ MLX5_CAP_GEN(dev->mdev, rq_ts_format) ==
+ MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ MLX5_CAP_GEN(dev->mdev, rq_ts_format) ==
+ MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+
+ if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
+ if (!fr_supported) {
+ mlx5_ib_dbg(dev, "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING;
+ }
+ return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT;
+}
+
+static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
+{
+ bool fr_supported =
+ MLX5_CAP_GEN(dev->mdev, sq_ts_format) ==
+ MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ MLX5_CAP_GEN(dev->mdev, sq_ts_format) ==
+ MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+
+ if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
+ if (!fr_supported) {
+ mlx5_ib_dbg(dev, "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING;
+ }
+ return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT;
+}
+
+static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq)
+{
+ bool fr_supported =
+ MLX5_CAP_ROCE(dev->mdev, qp_ts_format) ==
+ MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ MLX5_CAP_ROCE(dev->mdev, qp_ts_format) ==
+ MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+ int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
+
+ if (recv_cq &&
+ recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)
+ ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING;
+
+ if (send_cq &&
+ send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)
+ ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING;
+
+ if (ts_format == MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING &&
+ !fr_supported) {
+ mlx5_ib_dbg(dev, "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return ts_format;
+}
+
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct ib_udata *udata,
struct mlx5_ib_sq *sq, void *qpin,
- struct ib_pd *pd)
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
__be64 *pas;
@@ -1187,6 +1250,11 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
int err;
unsigned int page_offset_quantized;
unsigned long page_size;
+ int ts_format;
+
+ ts_format = get_sq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
ubuffer->buf_size, 0);
@@ -1215,6 +1283,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
MLX5_SET(sqc, sqc, tis_lst_sz, 1);
@@ -1263,7 +1332,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin,
- struct ib_pd *pd)
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
@@ -1274,9 +1343,14 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct ib_umem *umem = rq->base.ubuffer.umem;
unsigned int page_offset_quantized;
unsigned long page_size = 0;
+ int ts_format;
size_t inlen;
int err;
+ ts_format = get_rq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
+
page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
MLX5_ADAPTER_PAGE_SHIFT,
page_offset, 64,
@@ -1296,6 +1370,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
@@ -1393,10 +1468,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
}
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- u32 *in, size_t inlen,
- struct ib_pd *pd,
+ u32 *in, size_t inlen, struct ib_pd *pd,
struct ib_udata *udata,
- struct mlx5_ib_create_qp_resp *resp)
+ struct mlx5_ib_create_qp_resp *resp,
+ struct ib_qp_init_attr *init_attr)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
@@ -1415,7 +1490,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (err)
return err;
- err = create_raw_packet_qp_sq(dev, udata, sq, in, pd);
+ err = create_raw_packet_qp_sq(dev, udata, sq, in, pd,
+ to_mcq(init_attr->send_cq));
if (err)
goto err_destroy_tis;
@@ -1437,7 +1513,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in, pd);
+ err = create_raw_packet_qp_rq(dev, rq, in, pd,
+ to_mcq(init_attr->recv_cq));
if (err)
goto err_destroy_sq;
@@ -1907,6 +1984,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
struct mlx5_ib_qp_base *base;
+ int ts_format;
int mlx5_st;
void *qpc;
u32 *in;
@@ -1944,6 +2022,13 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
return -EINVAL;
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ if (ts_format < 0)
+ return ts_format;
+ }
+
err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
&inlen, base, ucmd);
if (err)
@@ -1992,6 +2077,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ MLX5_SET(qpc, qpc, ts_format, ts_format);
+
MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
if (qp->sq.wqe_cnt) {
@@ -2046,7 +2134,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
- &params->resp);
+ &params->resp, init_attr);
} else
err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
@@ -2432,9 +2520,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
case MLX5_IB_QPT_HW_GSI:
case IB_QPT_DRIVER:
case IB_QPT_GSI:
- if (dev->profile == &raw_eth_profile)
- goto out;
- fallthrough;
case IB_QPT_RAW_PACKET:
case IB_QPT_UD:
case MLX5_IB_QPT_REG_UMR:
@@ -2629,10 +2714,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
int create_flags = attr->create_flags;
bool cond;
- if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile)
- if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST)
- return -EINVAL;
-
if (qp_type == MLX5_IB_QPT_DCT)
return (create_flags) ? -EINVAL : 0;
@@ -3076,6 +3157,8 @@ static int ib_to_mlx5_rate_map(u8 rate)
return 4;
case IB_RATE_50_GBPS:
return 5;
+ case IB_RATE_400_GBPS:
+ return 6;
default:
return rate + MLX5_STAT_RATE_OFFSET;
}
@@ -3183,11 +3266,13 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
alt ? attr->alt_pkey_index : attr->pkey_index);
if (ah_flags & IB_AH_GRH) {
- if (grh->sgid_index >=
- dev->mdev->port_caps[port - 1].gid_table_len) {
+ const struct ib_port_immutable *immutable;
+
+ immutable = ib_port_immutable_read(&dev->ib_dev, port);
+ if (grh->sgid_index >= immutable->gid_tbl_len) {
pr_err("sgid_index (%u) too large. max is %d\n",
grh->sgid_index,
- dev->mdev->port_caps[port - 1].gid_table_len);
+ immutable->gid_tbl_len);
return -EINVAL;
}
}
@@ -4211,6 +4296,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
return 0;
}
+static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ enum ib_qp_type qp_type)
+{
+ if (dev->profile != &raw_eth_profile)
+ return true;
+
+ if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR)
+ return true;
+
+ /* Internal QP used for wc testing, with NOPs in wq */
+ if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
+ return true;
+
+ return false;
+}
+
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -4221,7 +4323,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
- int port;
+
+ if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type))
+ return -EOPNOTSUPP;
if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
return -EOPNOTSUPP;
@@ -4263,10 +4367,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- }
-
if (qp->flags & IB_QP_CREATE_SOURCE_QPN) {
if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
@@ -4295,14 +4395,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- if (attr_mask & IB_QP_PKEY_INDEX) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >=
- dev->mdev->port_caps[port - 1].pkey_table_len) {
- mlx5_ib_dbg(dev, "invalid pkey index %d\n",
- attr->pkey_index);
- goto out;
- }
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->pkey_table_len) {
+ mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index);
+ goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
@@ -5376,7 +5472,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
handle_drain_completion(cq, &rdrain, dev);
}
-/**
+/*
* Bind a qp to a counter. If @counter is NULL then bind the qp to
* the default counter
*/
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index d6038fb6c50c..cf2852cba45c 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -1369,7 +1369,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
handle_qpt_uc(wr, &seg, &size);
break;
case IB_QPT_SMI:
- if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+ if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
err = -EPERM;
*bad_wr = wr;