summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/cache.c15
-rw-r--r--drivers/infiniband/core/cma.c14
-rw-r--r--drivers/infiniband/core/device.c29
-rw-r--r--drivers/infiniband/core/fmr_pool.c37
-rw-r--r--drivers/infiniband/core/packer.c14
-rw-r--r--drivers/infiniband/core/sa_query.c13
-rw-r--r--drivers/infiniband/core/ucm.c8
-rw-r--r--drivers/infiniband/core/ucma.c6
-rw-r--r--drivers/infiniband/core/ud_header.c23
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c17
-rw-r--r--drivers/infiniband/core/uverbs_main.c80
-rw-r--r--drivers/infiniband/core/verbs.c164
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h4
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c16
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c6
-rw-r--r--drivers/infiniband/hw/mlx4/main.c72
-rw-r--r--drivers/infiniband/hw/mlx5/main.c54
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c41
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c77
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c33
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h16
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c38
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c5
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h7
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c7
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c15
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c40
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c912
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c225
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h15
-rw-r--r--include/linux/mlx4/device.h3
-rw-r--r--include/linux/mlx5/device.h7
-rw-r--r--include/linux/mlx5/fs.h5
-rw-r--r--include/linux/mlx5/mlx5_ifc.h3
-rw-r--r--include/rdma/ib_mad.h4
-rw-r--r--include/rdma/ib_verbs.h10
-rw-r--r--net/9p/trans_rdma.c86
50 files changed, 1285 insertions, 955 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 53343ffbff7a..cb00d59da456 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1043,8 +1043,8 @@ static void ib_cache_update(struct ib_device *device,
ret = ib_query_port(device, port, tprops);
if (ret) {
- printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
- ret, device->name);
+ pr_warn("ib_query_port failed (%d) for %s\n",
+ ret, device->name);
goto err;
}
@@ -1067,8 +1067,8 @@ static void ib_cache_update(struct ib_device *device,
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
- printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
- ret, device->name, i);
+ pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
goto err;
}
}
@@ -1078,8 +1078,8 @@ static void ib_cache_update(struct ib_device *device,
ret = ib_query_gid(device, port, i,
gid_cache->table + i, NULL);
if (ret) {
- printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
+ pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
goto err;
}
}
@@ -1161,8 +1161,7 @@ int ib_cache_setup_one(struct ib_device *device)
GFP_KERNEL);
if (!device->cache.pkey_cache ||
!device->cache.lmc_cache) {
- printk(KERN_WARNING "Couldn't allocate cache "
- "for %s\n", device->name);
+ pr_warn("Couldn't allocate cache for %s\n", device->name);
return -ENOMEM;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 7eace1ffa74d..93ab0ae97208 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1721,7 +1721,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
break;
default:
- printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
+ pr_err("RDMA CMA: unexpected IB CM event: %d\n",
ib_event->event);
goto out;
}
@@ -2194,8 +2194,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
ret = rdma_listen(id, id_priv->backlog);
if (ret)
- printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
- "listening on device %s\n", ret, cma_dev->device->name);
+ pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
+ ret, cma_dev->device->name);
}
static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -3247,7 +3247,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
event.status = 0;
break;
default:
- printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
+ pr_err("RDMA CMA: unexpected IB CM event: %d\n",
ib_event->event);
goto out;
}
@@ -4011,8 +4011,8 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
if ((dev_addr->bound_dev_if == ndev->ifindex) &&
(net_eq(dev_net(ndev), dev_addr->net)) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
- printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
- ndev->name, &id_priv->id);
+ pr_info("RDMA CM addr change for ndev %s used by id %p\n",
+ ndev->name, &id_priv->id);
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
@@ -4295,7 +4295,7 @@ static int __init cma_init(void)
goto err;
if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
- printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+ pr_warn("RDMA CMA: failed to add netlink callback\n");
cma_configfs_init();
return 0;
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 00da80e02154..270c7ff6cba7 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -115,8 +115,8 @@ static int ib_device_check_mandatory(struct ib_device *device)
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
- printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
- device->name, mandatory_table[i].name);
+ pr_warn("Device %s is missing mandatory function %s\n",
+ device->name, mandatory_table[i].name);
return -EINVAL;
}
}
@@ -255,8 +255,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context) {
- printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
- device->name, client->name);
+ pr_warn("Couldn't allocate client context for %s/%s\n",
+ device->name, client->name);
return -ENOMEM;
}
@@ -343,28 +343,29 @@ int ib_register_device(struct ib_device *device,
ret = read_port_immutable(device);
if (ret) {
- printk(KERN_WARNING "Couldn't create per port immutable data %s\n",
- device->name);
+ pr_warn("Couldn't create per port immutable data %s\n",
+ device->name);
goto out;
}
ret = ib_cache_setup_one(device);
if (ret) {
- printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
+ pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
goto out;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
- printk(KERN_WARNING "Couldn't query the device attributes\n");
+ pr_warn("Couldn't query the device attributes\n");
+ ib_cache_cleanup_one(device);
goto out;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
- printk(KERN_WARNING "Couldn't register device %s with driver model\n",
- device->name);
+ pr_warn("Couldn't register device %s with driver model\n",
+ device->name);
ib_cache_cleanup_one(device);
goto out;
}
@@ -565,8 +566,8 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
goto out;
}
- printk(KERN_WARNING "No client context found for %s/%s\n",
- device->name, client->name);
+ pr_warn("No client context found for %s/%s\n",
+ device->name, client->name);
out:
spin_unlock_irqrestore(&device->client_data_lock, flags);
@@ -959,13 +960,13 @@ static int __init ib_core_init(void)
ret = class_register(&ib_class);
if (ret) {
- printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+ pr_warn("Couldn't create InfiniBand device class\n");
goto err_comp;
}
ret = ibnl_init();
if (ret) {
- printk(KERN_WARNING "Couldn't init IB netlink interface\n");
+ pr_warn("Couldn't init IB netlink interface\n");
goto err_sysfs;
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 6ac3683c144b..cdbb1f1a6d97 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -150,8 +150,8 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
#ifdef DEBUG
if (fmr->ref_count !=0) {
- printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
- fmr, fmr->ref_count);
+ pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
+ fmr, fmr->ref_count);
}
#endif
}
@@ -167,7 +167,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
ret = ib_unmap_fmr(&fmr_list);
if (ret)
- printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
+ pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
spin_lock_irq(&pool->pool_lock);
list_splice(&unmap_list, &pool->free_list);
@@ -222,8 +222,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
device = pd->device;
if (!device->alloc_fmr || !device->dealloc_fmr ||
!device->map_phys_fmr || !device->unmap_fmr) {
- printk(KERN_INFO PFX "Device %s does not support FMRs\n",
- device->name);
+ pr_info(PFX "Device %s does not support FMRs\n", device->name);
return ERR_PTR(-ENOSYS);
}
@@ -233,13 +232,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
max_remaps = device->attrs.max_map_per_fmr;
pool = kmalloc(sizeof *pool, GFP_KERNEL);
- if (!pool) {
- printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
+ if (!pool)
return ERR_PTR(-ENOMEM);
- }
pool->cache_bucket = NULL;
-
pool->flush_function = params->flush_function;
pool->flush_arg = params->flush_arg;
@@ -251,7 +247,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
- printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
+ pr_warn(PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
@@ -275,7 +271,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
"ib_fmr(%s)",
device->name);
if (IS_ERR(pool->thread)) {
- printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
+ pr_warn(PFX "couldn't start cleanup thread\n");
ret = PTR_ERR(pool->thread);
goto out_free_pool;
}
@@ -294,11 +290,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
for (i = 0; i < params->pool_size; ++i) {
fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
- if (!fmr) {
- printk(KERN_WARNING PFX "failed to allocate fmr "
- "struct for FMR %d\n", i);
+ if (!fmr)
goto out_fail;
- }
fmr->pool = pool;
fmr->remap_count = 0;
@@ -307,8 +300,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
if (IS_ERR(fmr->fmr)) {
- printk(KERN_WARNING PFX "fmr_create failed "
- "for FMR %d\n", i);
+ pr_warn(PFX "fmr_create failed for FMR %d\n",
+ i);
kfree(fmr);
goto out_fail;
}
@@ -363,8 +356,8 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
}
if (i < pool->pool_size)
- printk(KERN_WARNING PFX "pool still has %d regions registered\n",
- pool->pool_size - i);
+ pr_warn(PFX "pool still has %d regions registered\n",
+ pool->pool_size - i);
kfree(pool->cache_bucket);
kfree(pool);
@@ -463,7 +456,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
list_add(&fmr->list, &pool->free_list);
spin_unlock_irqrestore(&pool->pool_lock, flags);
- printk(KERN_WARNING PFX "fmr_map returns %d\n", result);
+ pr_warn(PFX "fmr_map returns %d\n", result);
return ERR_PTR(result);
}
@@ -517,8 +510,8 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
#ifdef DEBUG
if (fmr->ref_count < 0)
- printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
- fmr, fmr->ref_count);
+ pr_warn(PFX "FMR %p has ref count %d < 0\n",
+ fmr, fmr->ref_count);
#endif
spin_unlock_irqrestore(&pool->pool_lock, flags);
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index 1b65986c0be3..19b1ee3279b4 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -44,7 +44,7 @@ static u64 value_read(int offset, int size, void *structure)
case 4: return be32_to_cpup((__be32 *) (structure + offset));
case 8: return be64_to_cpup((__be64 *) (structure + offset));
default:
- printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+ pr_warn("Field size %d bits not handled\n", size * 8);
return 0;
}
}
@@ -104,9 +104,8 @@ void ib_pack(const struct ib_field *desc,
} else {
if (desc[i].offset_bits % 8 ||
desc[i].size_bits % 8) {
- printk(KERN_WARNING "Structure field %s of size %d "
- "bits is not byte-aligned\n",
- desc[i].field_name, desc[i].size_bits);
+ pr_warn("Structure field %s of size %d bits is not byte-aligned\n",
+ desc[i].field_name, desc[i].size_bits);
}
if (desc[i].struct_size_bytes)
@@ -132,7 +131,7 @@ static void value_write(int offset, int size, u64 val, void *structure)
case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
default:
- printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+ pr_warn("Field size %d bits not handled\n", size * 8);
}
}
@@ -188,9 +187,8 @@ void ib_unpack(const struct ib_field *desc,
} else {
if (desc[i].offset_bits % 8 ||
desc[i].size_bits % 8) {
- printk(KERN_WARNING "Structure field %s of size %d "
- "bits is not byte-aligned\n",
- desc[i].field_name, desc[i].size_bits);
+ pr_warn("Structure field %s of size %d bits is not byte-aligned\n",
+ desc[i].field_name, desc[i].size_bits);
}
memcpy(structure + desc[i].struct_offset_bytes,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index f334090bb612..8e3bf6c8d3c3 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -864,13 +864,12 @@ static void update_sm_ah(struct work_struct *work)
struct ib_ah_attr ah_attr;
if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
- printk(KERN_WARNING "Couldn't query port\n");
+ pr_warn("Couldn't query port\n");
return;
}
new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
if (!new_ah) {
- printk(KERN_WARNING "Couldn't allocate new SM AH\n");
return;
}
@@ -880,7 +879,7 @@ static void update_sm_ah(struct work_struct *work)
new_ah->pkey_index = 0;
if (ib_find_pkey(port->agent->device, port->port_num,
IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
- printk(KERN_ERR "Couldn't find index for default PKey\n");
+ pr_err("Couldn't find index for default PKey\n");
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = port_attr.sm_lid;
@@ -889,7 +888,7 @@ static void update_sm_ah(struct work_struct *work)
new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
if (IS_ERR(new_ah->ah)) {
- printk(KERN_WARNING "Couldn't create new SM AH\n");
+ pr_warn("Couldn't create new SM AH\n");
kfree(new_ah);
return;
}
@@ -1221,7 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
rec.net = NULL;
rec.ifindex = 0;
rec.gid_type = IB_GID_TYPE_IB;
- memset(rec.dmac, 0, ETH_ALEN);
+ eth_zero_addr(rec.dmac);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
@@ -1800,13 +1799,13 @@ static int __init ib_sa_init(void)
ret = ib_register_client(&sa_client);
if (ret) {
- printk(KERN_ERR "Couldn't register ib_sa client\n");
+ pr_err("Couldn't register ib_sa client\n");
goto err1;
}
ret = mcast_init();
if (ret) {
- printk(KERN_ERR "Couldn't initialize multicast handling\n");
+ pr_err("Couldn't initialize multicast handling\n");
goto err2;
}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 6b4e8a008bc0..4a9aa0433b07 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1234,7 +1234,7 @@ static int find_overflow_devnum(void)
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
"infiniband_cm");
if (ret) {
- printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
+ pr_err("ucm: couldn't register dynamic device number\n");
return ret;
}
}
@@ -1329,19 +1329,19 @@ static int __init ib_ucm_init(void)
ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
"infiniband_cm");
if (ret) {
- printk(KERN_ERR "ucm: couldn't register device number\n");
+ pr_err("ucm: couldn't register device number\n");
goto error1;
}
ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
if (ret) {
- printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
+ pr_err("ucm: couldn't create abi_version attribute\n");
goto error2;
}
ret = ib_register_client(&ucm_client);
if (ret) {
- printk(KERN_ERR "ucm: couldn't register client\n");
+ pr_err("ucm: couldn't register client\n");
goto error3;
}
return 0;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 8b5a934e1133..dd3bcceadfde 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -314,7 +314,7 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
}
}
if (!event_found)
- printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n");
+ pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -1716,13 +1716,13 @@ static int __init ucma_init(void)
ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
if (ret) {
- printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
+ pr_err("rdma_ucm: couldn't create abi_version attr\n");
goto err1;
}
ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
if (!ucma_ctl_table_hdr) {
- printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");
+ pr_err("rdma_ucm: couldn't register sysctl paths\n");
ret = -ENOMEM;
goto err2;
}
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 2116132568e7..29a45d2f8898 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -479,8 +479,8 @@ int ib_ud_header_unpack(void *buf,
buf += IB_LRH_BYTES;
if (header->lrh.link_version != 0) {
- printk(KERN_WARNING "Invalid LRH.link_version %d\n",
- header->lrh.link_version);
+ pr_warn("Invalid LRH.link_version %d\n",
+ header->lrh.link_version);
return -EINVAL;
}
@@ -496,20 +496,20 @@ int ib_ud_header_unpack(void *buf,
buf += IB_GRH_BYTES;
if (header->grh.ip_version != 6) {
- printk(KERN_WARNING "Invalid GRH.ip_version %d\n",
- header->grh.ip_version);
+ pr_warn("Invalid GRH.ip_version %d\n",
+ header->grh.ip_version);
return -EINVAL;
}
if (header->grh.next_header != 0x1b) {
- printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n",
- header->grh.next_header);
+ pr_warn("Invalid GRH.next_header 0x%02x\n",
+ header->grh.next_header);
return -EINVAL;
}
break;
default:
- printk(KERN_WARNING "Invalid LRH.link_next_header %d\n",
- header->lrh.link_next_header);
+ pr_warn("Invalid LRH.link_next_header %d\n",
+ header->lrh.link_next_header);
return -EINVAL;
}
@@ -525,14 +525,13 @@ int ib_ud_header_unpack(void *buf,
header->immediate_present = 1;
break;
default:
- printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n",
- header->bth.opcode);
+ pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode);
return -EINVAL;
}
if (header->bth.transport_header_version != 0) {
- printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n",
- header->bth.transport_header_version);
+ pr_warn("Invalid BTH.transport_header_version %d\n",
+ header->bth.transport_header_version);
return -EINVAL;
}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2bf751ecad15..3638c787cb7c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1976,7 +1976,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
resp_size);
INIT_UDATA(&uhw, buf + sizeof(cmd),
(unsigned long)cmd.response + resp_size,
- in_len - sizeof(cmd), out_len - resp_size);
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - resp_size);
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -3091,6 +3092,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
!capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
return -EPERM;
+ if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
+ return -EINVAL;
+
+ if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
+ ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) ||
+ (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT)))
+ return -EINVAL;
+
if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
@@ -3419,7 +3428,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof resp);
ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
if (ret)
@@ -3445,7 +3455,8 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof resp);
ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
if (ret)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 39680aed99dd..28ba2cc81535 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -683,12 +683,28 @@ out:
return ev_file;
}
+static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
+{
+ u64 mask;
+
+ if (command <= IB_USER_VERBS_CMD_OPEN_QP)
+ mask = ib_dev->uverbs_cmd_mask;
+ else
+ mask = ib_dev->uverbs_ex_cmd_mask;
+
+ if (mask & ((u64)1 << command))
+ return 0;
+
+ return -1;
+}
+
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
+ __u32 command;
__u32 flags;
int srcu_key;
ssize_t ret;
@@ -707,37 +723,34 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
goto out;
}
- flags = (hdr.command &
- IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK)) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (!flags) {
- __u32 command;
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+ if (verify_command_mask(ib_dev, command)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
- if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK)) {
- ret = -EINVAL;
- goto out;
- }
+ if (!file->ucontext &&
+ command != IB_USER_VERBS_CMD_GET_CONTEXT) {
+ ret = -EINVAL;
+ goto out;
+ }
- command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+ flags = (hdr.command &
+ IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
+ if (!flags) {
if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[command]) {
ret = -EINVAL;
goto out;
}
- if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) {
- ret = -ENOSYS;
- goto out;
- }
-
if (hdr.in_words * 4 != count) {
ret = -EINVAL;
goto out;
@@ -749,21 +762,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
hdr.out_words * 4);
} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
- __u32 command;
-
struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_udata ucore;
struct ib_udata uhw;
size_t written_count = count;
- if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK)) {
- ret = -EINVAL;
- goto out;
- }
-
- command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
-
if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
!uverbs_ex_cmd_table[command]) {
ret = -ENOSYS;
@@ -775,11 +778,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
goto out;
}
- if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) {
- ret = -ENOSYS;
- goto out;
- }
-
if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
ret = -EINVAL;
goto out;
@@ -1058,7 +1056,7 @@ static int find_overflow_devnum(void)
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
"infiniband_verbs");
if (ret) {
- printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
+ pr_err("user_verbs: couldn't register dynamic device number\n");
return ret;
}
}
@@ -1279,14 +1277,14 @@ static int __init ib_uverbs_init(void)
ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
"infiniband_verbs");
if (ret) {
- printk(KERN_ERR "user_verbs: couldn't register device number\n");
+ pr_err("user_verbs: couldn't register device number\n");
goto out;
}
uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
if (IS_ERR(uverbs_class)) {
ret = PTR_ERR(uverbs_class);
- printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
+ pr_err("user_verbs: couldn't create class infiniband_verbs\n");
goto out_chrdev;
}
@@ -1294,13 +1292,13 @@ static int __init ib_uverbs_init(void)
ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
if (ret) {
- printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
+ pr_err("user_verbs: couldn't create abi_version attribute\n");
goto out_class;
}
ret = ib_register_client(&uverbs_client);
if (ret) {
- printk(KERN_ERR "user_verbs: couldn't register client\n");
+ pr_err("user_verbs: couldn't register client\n");
goto out_class;
}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 16f3fb1b9d75..5cd1e3987f2b 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1659,3 +1659,167 @@ next_page:
return i;
}
EXPORT_SYMBOL(ib_sg_to_pages);
+
+struct ib_drain_cqe {
+ struct ib_cqe cqe;
+ struct completion done;
+};
+
+static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
+ cqe);
+
+ complete(&cqe->done);
+}
+
+/*
+ * Post a WR and block until its completion is reaped for the SQ.
+ */
+static void __ib_drain_sq(struct ib_qp *qp)
+{
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct ib_drain_cqe sdrain;
+ struct ib_send_wr swr = {}, *bad_swr;
+ int ret;
+
+ if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
+ WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
+ "IB_POLL_DIRECT poll_ctx not supported for drain\n");
+ return;
+ }
+
+ swr.wr_cqe = &sdrain.cqe;
+ sdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ ret = ib_post_send(qp, &swr, &bad_swr);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ wait_for_completion(&sdrain.done);
+}
+
+/*
+ * Post a WR and block until its completion is reaped for the RQ.
+ */
+static void __ib_drain_rq(struct ib_qp *qp)
+{
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct ib_drain_cqe rdrain;
+ struct ib_recv_wr rwr = {}, *bad_rwr;
+ int ret;
+
+ if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
+ WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
+ "IB_POLL_DIRECT poll_ctx not supported for drain\n");
+ return;
+ }
+
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ ret = ib_post_recv(qp, &rwr, &bad_rwr);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ wait_for_completion(&rdrain.done);
+}
+
+/**
+ * ib_drain_sq() - Block until all SQ CQEs have been consumed by the
+ * application.
+ * @qp: queue pair to drain
+ *
+ * If the device has a provider-specific drain function, then
+ * call that. Otherwise call the generic drain function
+ * __ib_drain_sq().
+ *
+ * The caller must:
+ *
+ * ensure there is room in the CQ and SQ for the drain work request and
+ * completion.
+ *
+ * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
+ * IB_POLL_DIRECT.
+ *
+ * ensure that there are no other contexts that are posting WRs concurrently.
+ * Otherwise the drain is not guaranteed.
+ */
+void ib_drain_sq(struct ib_qp *qp)
+{
+ if (qp->device->drain_sq)
+ qp->device->drain_sq(qp);
+ else
+ __ib_drain_sq(qp);
+}
+EXPORT_SYMBOL(ib_drain_sq);
+
+/**
+ * ib_drain_rq() - Block until all RQ CQEs have been consumed by the
+ * application.
+ * @qp: queue pair to drain
+ *
+ * If the device has a provider-specific drain function, then
+ * call that. Otherwise call the generic drain function
+ * __ib_drain_rq().
+ *
+ * The caller must:
+ *
+ * ensure there is room in the CQ and RQ for the drain work request and
+ * completion.
+ *
+ * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
+ * IB_POLL_DIRECT.
+ *
+ * ensure that there are no other contexts that are posting WRs concurrently.
+ * Otherwise the drain is not guaranteed.
+ */
+void ib_drain_rq(struct ib_qp *qp)
+{
+ if (qp->device->drain_rq)
+ qp->device->drain_rq(qp);
+ else
+ __ib_drain_rq(qp);
+}
+EXPORT_SYMBOL(ib_drain_rq);
+
+/**
+ * ib_drain_qp() - Block until all CQEs have been consumed by the
+ * application on both the RQ and SQ.
+ * @qp: queue pair to drain
+ *
+ * The caller must:
+ *
+ * ensure there is room in the CQ(s), SQ, and RQ for drain work requests
+ * and completions.
+ *
+ * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
+ * IB_POLL_DIRECT.
+ *
+ * ensure that there are no other contexts that are posting WRs concurrently.
+ * Otherwise the drain is not guaranteed.
+ */
+void ib_drain_qp(struct ib_qp *qp)
+{
+ ib_drain_sq(qp);
+ ib_drain_rq(qp);
+}
+EXPORT_SYMBOL(ib_drain_qp);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index cf21df4a8bf5..b4eeb783573c 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
}
}
out:
- if (wq)
+ if (wq) {
+ if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) {
+ if (t4_sq_empty(wq))
+ complete(&qhp->sq_drained);
+ if (t4_rq_empty(wq))
+ complete(&qhp->rq_drained);
+ }
spin_unlock(&qhp->lock);
+ }
return ret;
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 423a3a91bf41..97c0e5aa5c64 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -476,6 +476,8 @@ struct c4iw_qp {
wait_queue_head_t wait;
struct timer_list timer;
int sq_sig_all;
+ struct completion rq_drained;
+ struct completion sq_drained;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -1017,6 +1019,8 @@ extern int c4iw_wr_log;
extern int db_fc_threshold;
extern int db_coalescing_threshold;
extern int use_dsgl;
+void c4iw_drain_rq(struct ib_qp *qp);
+void c4iw_drain_sq(struct ib_qp *qp);
#endif
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index ec04272fbdc2..104662d38d1e 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -564,6 +564,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.get_protocol_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
+ dev->ibdev.drain_sq = c4iw_drain_sq;
+ dev->ibdev.drain_rq = c4iw_drain_rq;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e99345eb875a..7b1b1e840ef1 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1697,6 +1697,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->attr.max_ird = 0;
qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
spin_lock_init(&qhp->lock);
+ init_completion(&qhp->sq_drained);
+ init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
@@ -1888,3 +1890,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
return 0;
}
+
+void c4iw_drain_sq(struct ib_qp *ibqp)
+{
+ struct c4iw_qp *qp = to_c4iw_qp(ibqp);
+
+ wait_for_completion(&qp->sq_drained);
+}
+
+void c4iw_drain_rq(struct ib_qp *ibqp)
+{
+ struct c4iw_qp *qp = to_c4iw_qp(ibqp);
+
+ wait_for_completion(&qp->rq_drained);
+}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 21cb41a60fe8..c74ef2620b85 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status,
if (status) {
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
- rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
+ rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC;
goto out;
}
@@ -416,7 +416,7 @@ next_entry:
be64_to_cpu((__force __be64)rec->guid_indexes),
be64_to_cpu((__force __be64)applied_guid_indexes),
be64_to_cpu((__force __be64)declined_guid_indexes));
- rec->time_to_run = ktime_get_real_ns() +
+ rec->time_to_run = ktime_get_boot_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
@@ -708,7 +708,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
}
}
if (resched_delay_sec) {
- u64 curr_time = ktime_get_real_ns();
+ u64 curr_time = ktime_get_boot_ns();
*resched_delay_sec = (low_record_time < curr_time) ? 0 :
div_u64((low_record_time - curr_time), NSEC_PER_SEC);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1c7ab6cabbb8..914bc98e753f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1643,6 +1643,56 @@ static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_
return err;
}
+static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
+ struct ib_flow_attr *flow_attr,
+ enum mlx4_net_trans_promisc_mode *type)
+{
+ int err = 0;
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
+ (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
+ (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_attr->num_of_specs == 0) {
+ type[0] = MLX4_FS_MC_SNIFFER;
+ type[1] = MLX4_FS_UC_SNIFFER;
+ } else {
+ union ib_flow_spec *ib_spec;
+
+ ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ if (ib_spec->type != IB_FLOW_SPEC_ETH)
+ return -EINVAL;
+
+ /* if all is zero than MC and UC */
+ if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
+ type[0] = MLX4_FS_MC_SNIFFER;
+ type[1] = MLX4_FS_UC_SNIFFER;
+ } else {
+ u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
+ ib_spec->eth.mask.dst_mac[1],
+ ib_spec->eth.mask.dst_mac[2],
+ ib_spec->eth.mask.dst_mac[3],
+ ib_spec->eth.mask.dst_mac[4],
+ ib_spec->eth.mask.dst_mac[5]};
+
+ /* Above xor was only on MC bit, non empty mask is valid
+ * only if this bit is set and rest are zero.
+ */
+ if (!is_zero_ether_addr(&mac[0]))
+ return -EINVAL;
+
+ if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
+ type[0] = MLX4_FS_MC_SNIFFER;
+ else
+ type[0] = MLX4_FS_UC_SNIFFER;
+ }
+ }
+
+ return err;
+}
+
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
@@ -1653,6 +1703,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
int is_bonded = mlx4_is_bonded(dev);
+ if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
+ (flow_attr->type != IB_FLOW_ATTR_NORMAL))
+ return ERR_PTR(-EOPNOTSUPP);
+
memset(type, 0, sizeof(type));
mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
@@ -1663,7 +1717,19 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
switch (flow_attr->type) {
case IB_FLOW_ATTR_NORMAL:
- type[0] = MLX4_FS_REGULAR;
+ /* If dont trap flag (continue match) is set, under specific
+ * condition traffic be replicated to given qp,
+ * without stealing it
+ */
+ if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
+ err = mlx4_ib_add_dont_trap_rule(dev,
+ flow_attr,
+ type);
+ if (err)
+ goto err_free;
+ } else {
+ type[0] = MLX4_FS_REGULAR;
+ }
break;
case IB_FLOW_ATTR_ALL_DEFAULT:
@@ -1675,8 +1741,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
break;
case IB_FLOW_ATTR_SNIFFER:
- type[0] = MLX4_FS_UC_SNIFFER;
- type[1] = MLX4_FS_MC_SNIFFER;
+ type[0] = MLX4_FS_MIRROR_RX_PORT;
+ type[1] = MLX4_FS_MIRROR_SX_PORT;
break;
default:
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7e89a547bf34..5afbb697e691 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1382,11 +1382,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
return 0;
}
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+ priority *= 2;
+ if (!dont_trap)
+ priority++;
+ return priority;
+}
+
#define MLX5_FS_MAX_TYPES 10
#define MLX5_FS_MAX_ENTRIES 32000UL
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr)
{
+ bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
@@ -1396,10 +1405,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
int err = 0;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_is_multicast_only(flow_attr))
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
- priority = flow_attr->priority;
+ priority = ib_prio_to_core_prio(flow_attr->priority,
+ dont_trap);
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
@@ -1447,6 +1458,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
unsigned int spec_index;
u32 *match_c;
u32 *match_v;
+ u32 action;
int err = 0;
if (!is_valid_attr(flow_attr))
@@ -1472,9 +1484,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
/* Outer header support only */
match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+ action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
match_c, match_v,
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ action,
MLX5_FS_DEFAULT_FLOW_TAG,
dst);
@@ -1494,6 +1508,29 @@ free:
return err ? ERR_PTR(err) : handler;
}
+static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_dst = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
+ if (!IS_ERR(handler)) {
+ handler_dst = create_flow_rule(dev, ft_prio,
+ flow_attr, dst);
+ if (IS_ERR(handler_dst)) {
+ mlx5_del_flow_rule(handler->rule);
+ kfree(handler);
+ handler = handler_dst;
+ } else {
+ list_add(&handler_dst->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
enum {
LEFTOVERS_MC,
LEFTOVERS_UC,
@@ -1571,7 +1608,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
- flow_attr->flags)
+ (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -1590,8 +1627,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- handler = create_flow_rule(dev, ft_prio, flow_attr,
- dst);
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
+ handler = create_dont_trap_rule(dev, ft_prio,
+ flow_attr, dst);
+ } else {
+ handler = create_flow_rule(dev, ft_prio, flow_attr,
+ dst);
+ }
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
handler = create_leftovers_rule(dev, ft_prio, flow_attr,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 60b89629f091..76b2b42e0535 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -127,7 +127,7 @@ struct mlx5_ib_pd {
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
-#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1)
+#define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1)
#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
#error "Invalid number of bypass priorities"
#endif
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4659256cd95e..3b2ddd64a371 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -75,7 +75,8 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in,
- struct ib_udata *udata, int buf_size, int *inlen)
+ struct ib_udata *udata, int buf_size, int *inlen,
+ int is_xrc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd = {};
@@ -87,13 +88,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
int ncont;
u32 offset;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
- int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- if (drv_data < 0)
- return -EINVAL;
-
- ucmdlen = (drv_data < sizeof(ucmd)) ?
- drv_data : sizeof(ucmd);
+ ucmdlen = min(udata->inlen, sizeof(ucmd));
if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
@@ -103,15 +99,17 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
- if (drv_data > sizeof(ucmd) &&
+ if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
- drv_data - sizeof(ucmd)))
+ udata->inlen - sizeof(ucmd)))
return -EINVAL;
- err = get_srq_user_index(to_mucontext(pd->uobject->context),
- &ucmd, udata->inlen, &uidx);
- if (err)
- return err;
+ if (is_xrc) {
+ err = get_srq_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+ }
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
@@ -151,7 +149,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
- if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
xrc_srq_context_entry);
MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
@@ -170,7 +169,7 @@ err_umem:
static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in, int buf_size,
- int *inlen)
+ int *inlen, int is_xrc)
{
int err;
int i;
@@ -224,7 +223,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
xrc_srq_context_entry);
/* 0xffffff means we ask to work with cqe version 0 */
@@ -302,10 +302,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
+ is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+
if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+ err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
+ is_xrc);
else
- err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+ err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
+ is_xrc);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -313,7 +317,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
- is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
in->ctx.state_log_sz = ilog2(srq->msrq.max);
flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
xrcdn = 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 12503f15fbd6..45bdfa0e3b2b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -114,6 +114,7 @@ struct ocrdma_dev_attr {
u8 local_ca_ack_delay;
u8 ird;
u8 num_ird_pages;
+ u8 udp_encap;
};
struct ocrdma_dma_mem {
@@ -356,6 +357,7 @@ struct ocrdma_ah {
struct ocrdma_av *av;
u16 sgid_index;
u32 id;
+ u8 hdr_type;
};
struct ocrdma_qp_hwq_info {
@@ -598,4 +600,10 @@ static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
}
+static inline bool ocrdma_is_udp_encap_supported(struct ocrdma_dev *dev)
+{
+ return (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV4) ||
+ (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV6);
+}
+
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 3790771f2baa..797362a297b2 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -55,18 +55,46 @@
#define OCRDMA_VID_PCP_SHIFT 0xD
+static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
+{
+ switch (hdr_type) {
+ case OCRDMA_L3_TYPE_IB_GRH:
+ return (u16)0x8915;
+ case OCRDMA_L3_TYPE_IPV4:
+ return (u16)0x0800;
+ case OCRDMA_L3_TYPE_IPV6:
+ return (u16)0x86dd;
+ default:
+ pr_err("ocrdma%d: Invalid network header\n", devid);
+ return 0;
+ }
+}
+
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah_attr *attr, union ib_gid *sgid,
int pdid, bool *isvlan, u16 vlan_tag)
{
- int status = 0;
+ int status;
struct ocrdma_eth_vlan eth;
struct ocrdma_grh grh;
int eth_sz;
+ u16 proto_num = 0;
+ u8 nxthdr = 0x11;
+ struct iphdr ipv4;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
memset(&eth, 0, sizeof(eth));
memset(&grh, 0, sizeof(grh));
+ /* Protocol Number */
+ proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type);
+ if (!proto_num)
+ return -EINVAL;
+ nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11;
/* VLAN */
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
@@ -78,13 +106,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
dev->id);
}
eth.eth_type = cpu_to_be16(0x8100);
- eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+ eth.roce_eth_type = cpu_to_be16(proto_num);
vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
eth.vlan_tag = cpu_to_be16(vlan_tag);
eth_sz = sizeof(struct ocrdma_eth_vlan);
*isvlan = true;
} else {
- eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+ eth.eth_type = cpu_to_be16(proto_num);
eth_sz = sizeof(struct ocrdma_eth_basic);
}
/* MAC */
@@ -93,18 +121,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
if (status)
return status;
ah->sgid_index = attr->grh.sgid_index;
- memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
- memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
-
- grh.tclass_flow = cpu_to_be32((6 << 28) |
- (attr->grh.traffic_class << 24) |
- attr->grh.flow_label);
- /* 0x1b is next header value in GRH */
- grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
- (0x1b << 8) | attr->grh.hop_limit);
/* Eth HDR */
memcpy(&ah->av->eth_hdr, &eth, eth_sz);
- memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
+ if (ah->hdr_type == RDMA_NETWORK_IPV4) {
+ *((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) |
+ attr->grh.traffic_class);
+ ipv4.id = cpu_to_be16(pdid);
+ ipv4.frag_off = htons(IP_DF);
+ ipv4.tot_len = htons(0);
+ ipv4.ttl = attr->grh.hop_limit;
+ ipv4.protocol = nxthdr;
+ rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
+ rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid);
+ ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
+ memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
+ } else {
+ memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
+ grh.tclass_flow = cpu_to_be32((6 << 28) |
+ (attr->grh.traffic_class << 24) |
+ attr->grh.flow_label);
+ memcpy(&grh.dgid[0], attr->grh.dgid.raw,
+ sizeof(attr->grh.dgid.raw));
+ grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
+ (nxthdr << 8) |
+ attr->grh.hop_limit);
+ memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
+ }
if (*isvlan)
ah->av->valid |= OCRDMA_AV_VLAN_VALID;
ah->av->valid = cpu_to_le32(ah->av->valid);
@@ -128,6 +171,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
+
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
@@ -148,6 +192,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
dev_put(sgid_attr.ndev);
}
+ /* Get network header type for this GID */
+ ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
if ((pd->uctx) &&
(!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
@@ -172,6 +218,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
*ahid_addr = 0;
*ahid_addr |= ah->id & OCRDMA_AH_ID_MASK;
+ if (ocrdma_is_udp_encap_supported(dev)) {
+ *ahid_addr |= ((u32)ah->hdr_type &
+ OCRDMA_AH_L3_TYPE_MASK) <<
+ OCRDMA_AH_L3_TYPE_SHIFT;
+ }
if (isvlan)
*ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK <<
OCRDMA_AH_VLAN_VALID_SHIFT);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 04a30ae67473..3856dd4c7e3d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -46,9 +46,10 @@
enum {
OCRDMA_AH_ID_MASK = 0x3FF,
OCRDMA_AH_VLAN_VALID_MASK = 0x01,
- OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F
+ OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F,
+ OCRDMA_AH_L3_TYPE_MASK = 0x03,
+ OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
};
-
struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
int ocrdma_destroy_ah(struct ib_ah *);
int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 283ca842ff74..16740dcb876b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1113,7 +1113,7 @@ mbx_err:
static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
void *payload_va)
{
- int status = 0;
+ int status;
struct ocrdma_mbx_rsp *rsp = payload_va;
if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
@@ -1144,6 +1144,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_pd =
(rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT;
+ attr->udp_encap = (rsp->max_pd_ca_ack_delay &
+ OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT;
attr->max_dpp_pds =
(rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET;
@@ -2138,7 +2141,6 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
enum ib_qp_state *old_ib_state)
{
unsigned long flags;
- int status = 0;
enum ocrdma_qp_state new_state;
new_state = get_ocrdma_qp_state(new_ib_state);
@@ -2163,7 +2165,7 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
qp->state = new_state;
spin_unlock_irqrestore(&qp->q_lock, flags);
- return status;
+ return 0;
}
static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp)
@@ -2501,7 +2503,12 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
union ib_gid sgid, zgid;
struct ib_gid_attr sgid_attr;
u32 vlan_id = 0xFFFF;
- u8 mac_addr[6];
+ u8 mac_addr[6], hdr_type;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
@@ -2516,6 +2523,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
cmd->params.hop_lmt_rq_psn |=
(ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
+
+ /* GIDs */
memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
sizeof(cmd->params.dgid));
@@ -2538,6 +2547,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
return status;
cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
(mac_addr[2] << 16) | (mac_addr[3] << 24);
+
+ hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ if (hdr_type == RDMA_NETWORK_IPV4) {
+ rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid);
+ memcpy(&cmd->params.dgid[0],
+ &dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
+ memcpy(&cmd->params.sgid[0],
+ &sgid_addr._sockaddr_in.sin_addr.s_addr, 4);
+ }
/* convert them to LE format. */
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
@@ -2558,7 +2577,9 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
cmd->params.rnt_rc_sl_fl |=
(dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
}
-
+ cmd->params.max_sge_recv_flags |= ((hdr_type <<
+ OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT) &
+ OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK);
return 0;
}
@@ -2871,7 +2892,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
struct ocrdma_dcbx_cfg *dcbxcfg)
{
- int status = 0;
+ int status;
dma_addr_t pa;
struct ocrdma_mqe cmd;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index f38743018cb4..3d75f65ce87e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -89,8 +89,10 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct ocrdma_dev *dev;
int err;
+ dev = get_ocrdma_dev(ibdev);
err = ocrdma_query_port(ibdev, port_num, &attr);
if (err)
return err;
@@ -98,6 +100,8 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (ocrdma_is_udp_encap_supported(dev))
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 99dd6fdf06d7..0efc9662c6d8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -140,7 +140,11 @@ enum {
OCRDMA_DB_RQ_SHIFT = 24
};
-#define OCRDMA_ROUDP_FLAGS_SHIFT 0x03
+enum {
+ OCRDMA_L3_TYPE_IB_GRH = 0x00,
+ OCRDMA_L3_TYPE_IPV4 = 0x01,
+ OCRDMA_L3_TYPE_IPV6 = 0x02
+};
#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */
#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */
@@ -546,7 +550,8 @@ enum {
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8,
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF <<
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT,
-
+ OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT = 3,
+ OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18,
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0,
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF,
OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16,
@@ -1107,6 +1112,8 @@ enum {
OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7),
OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8),
OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9),
+ OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT = 11,
+ OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK = BIT(11) | BIT(12) | BIT(13),
OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16,
OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF <<
OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
@@ -1735,8 +1742,11 @@ enum {
/* w1 */
OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16,
+ OCRDMA_CQE_UD_XFER_LEN_MASK = 0x1FFF,
OCRDMA_CQE_PKEY_SHIFT = 0,
OCRDMA_CQE_PKEY_MASK = 0xFFFF,
+ OCRDMA_CQE_UD_L3TYPE_SHIFT = 29,
+ OCRDMA_CQE_UD_L3TYPE_MASK = 0x07,
/* w2 */
OCRDMA_CQE_QPN_SHIFT = 0,
@@ -1861,7 +1871,7 @@ struct ocrdma_ewqe_ud_hdr {
u32 rsvd_dest_qpn;
u32 qkey;
u32 rsvd_ahid;
- u32 rsvd;
+ u32 hdr_type;
};
/* extended wqe followed by hdr_wqe for Fast Memory register */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 255f774080a4..8bef09a8c49f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -610,7 +610,7 @@ static char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev)
static void ocrdma_update_stats(struct ocrdma_dev *dev)
{
ulong now = jiffies, secs;
- int status = 0;
+ int status;
struct ocrdma_rdma_stats_resp *rdma_stats =
(struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
@@ -641,7 +641,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
{
char tmp_str[32];
long reset;
- int status = 0;
+ int status;
struct ocrdma_stats *pstats = filp->private_data;
struct ocrdma_dev *dev = pstats->dev;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 12420e4ecf3d..a8496a18e20d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -419,7 +419,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
struct ib_udata *udata)
{
struct ocrdma_pd *pd = NULL;
- int status = 0;
+ int status;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
@@ -468,7 +468,7 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
struct ocrdma_pd *pd)
{
- int status = 0;
+ int status;
if (dev->pd_mgr->pd_prealloc_valid)
status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
@@ -596,7 +596,7 @@ map_err:
int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
{
- int status = 0;
+ int status;
struct ocrdma_mm *mm, *tmp;
struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
@@ -623,7 +623,7 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
unsigned long len = (vma->vm_end - vma->vm_start);
- int status = 0;
+ int status;
bool found;
if (vma->vm_start & (PAGE_SIZE - 1))
@@ -1285,7 +1285,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
struct ib_udata *udata, int dpp_offset,
int dpp_credit_lmt, int srq)
{
- int status = 0;
+ int status;
u64 usr_db;
struct ocrdma_create_qp_uresp uresp;
struct ocrdma_pd *pd = qp->pd;
@@ -1494,9 +1494,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
*/
if (status < 0)
return status;
- status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
-
- return status;
+ return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
}
int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1949,7 +1947,7 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata)
{
- int status = 0;
+ int status;
struct ocrdma_srq *srq;
srq = get_ocrdma_srq(ibsrq);
@@ -2005,6 +2003,7 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
else
ud_hdr->qkey = ud_wr(wr)->remote_qkey;
ud_hdr->rsvd_ahid = ah->id;
+ ud_hdr->hdr_type = ah->hdr_type;
if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
}
@@ -2717,9 +2716,11 @@ static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
return expand;
}
-static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
+static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
+ struct ocrdma_cqe *cqe)
{
int status;
+ u16 hdr_type = 0;
status = (le32_to_cpu(cqe->flags_status_srcqpn) &
OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
@@ -2728,7 +2729,17 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
ibwc->pkey_index = 0;
ibwc->wc_flags = IB_WC_GRH;
ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
- OCRDMA_CQE_UD_XFER_LEN_SHIFT);
+ OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
+ OCRDMA_CQE_UD_XFER_LEN_MASK;
+
+ if (ocrdma_is_udp_encap_supported(dev)) {
+ hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
+ OCRDMA_CQE_UD_L3TYPE_SHIFT) &
+ OCRDMA_CQE_UD_L3TYPE_MASK;
+ ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ ibwc->network_hdr_type = hdr_type;
+ }
+
return status;
}
@@ -2791,12 +2802,15 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
{
+ struct ocrdma_dev *dev;
+
+ dev = get_ocrdma_dev(qp->ibqp.device);
ibwc->opcode = IB_WC_RECV;
ibwc->qp = &qp->ibqp;
ibwc->status = IB_WC_SUCCESS;
if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
- ocrdma_update_ud_rcqe(ibwc, cqe);
+ ocrdma_update_ud_rcqe(dev, ibwc, cqe);
else
ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a6f3eab0f350..85be0de3ab26 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -244,6 +244,7 @@ struct ipoib_cm_tx {
unsigned tx_tail;
unsigned long flags;
u32 mtu;
+ unsigned max_send_sge;
};
struct ipoib_cm_rx_buf {
@@ -390,6 +391,7 @@ struct ipoib_dev_priv {
int hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
+ unsigned max_send_sge;
};
struct ipoib_ah {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 917e46ea3bf6..c8ed53562c9b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
+ unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
return;
}
-
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ if (skb_linearize(skb) < 0) {
+ ipoib_warn(priv, "skb could not be linearized\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ /* Does skb_linearize return ok without reducing nr_frags? */
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ ipoib_warn(priv, "too many frags after skb linearize\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ }
ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
tx->tx_head, skb->len, tx->qp->qp_num);
@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG)
- attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+ attr.cap.max_send_sge =
+ min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) {
@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
tx_qp = ib_create_qp(priv->pd, &attr);
}
+ tx->max_send_sge = attr.cap.max_send_sge;
return tx_qp;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index fa9c42ff1fb0..899e6b7fb8a5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
+ unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
phead = NULL;
hlen = 0;
}
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ if (skb_linearize(skb) < 0) {
+ ipoib_warn(priv, "skb could not be linearized\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ /* Does skb_linearize return ok without reducing nr_frags? */
+ if (skb_shinfo(skb)->nr_frags > usable_sge) {
+ ipoib_warn(priv, "too many frags after skb linearize\n");
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ }
ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
skb->len, address, qpn);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index d48c5bae7877..b809c373e40e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (dev->features & NETIF_F_SG)
- init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+ init_attr.cap.max_send_sge =
+ min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
+ priv->max_send_sge = init_attr.cap.max_send_sge;
+
return 0;
out_free_send_cq:
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 95f0a64e076b..0351059783b1 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -458,9 +458,6 @@ struct iser_fr_pool {
* @comp: iser completion context
* @fr_pool: connection fast registration poool
* @pi_support: Indicate device T10-PI support
- * @last: last send wr to signal all flush errors were drained
- * @last_cqe: cqe handler for last wr
- * @last_comp: completes when all connection completions consumed
*/
struct ib_conn {
struct rdma_cm_id *cma_id;
@@ -472,10 +469,7 @@ struct ib_conn {
struct iser_comp *comp;
struct iser_fr_pool fr_pool;
bool pi_support;
- struct ib_send_wr last;
- struct ib_cqe last_cqe;
struct ib_cqe reg_cqe;
- struct completion last_comp;
};
/**
@@ -617,7 +611,6 @@ void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
-void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_task_rdma_init(struct iscsi_iser_task *task);
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ed54b388e7ad..81ae2e30dd12 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -729,13 +729,6 @@ void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
kmem_cache_free(ig.desc_cache, desc);
}
-void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_conn *ib_conn = wc->qp->qp_context;
-
- complete(&ib_conn->last_comp);
-}
-
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
{
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index f21bdcc34d59..1b4945367e4f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -670,7 +670,6 @@ void iser_conn_release(struct iser_conn *iser_conn)
int iser_conn_terminate(struct iser_conn *iser_conn)
{
struct ib_conn *ib_conn = &iser_conn->ib_conn;
- struct ib_send_wr *bad_wr;
int err = 0;
/* terminate the iser conn only if the conn state is UP */
@@ -695,14 +694,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_err("Failed to disconnect, conn: 0x%p err %d\n",
iser_conn, err);
- /* post an indication that all flush errors were consumed */
- err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
- if (err) {
- iser_err("conn %p failed to post last wr", ib_conn);
- return 1;
- }
-
- wait_for_completion(&ib_conn->last_comp);
+ /* block until all flush errors are consumed */
+ ib_drain_sq(ib_conn->qp);
}
return 1;
@@ -961,10 +954,6 @@ void iser_conn_init(struct iser_conn *iser_conn)
ib_conn->post_recv_buf_count = 0;
ib_conn->reg_cqe.done = iser_reg_comp;
- ib_conn->last_cqe.done = iser_last_comp;
- ib_conn->last.wr_cqe = &ib_conn->last_cqe;
- ib_conn->last.opcode = IB_WR_SEND;
- init_completion(&ib_conn->last_comp);
}
/**
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 03022f6420d7..b6bf20496021 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -446,49 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
dev->max_pages_per_mr);
}
-static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct srp_rdma_ch *ch = cq->cq_context;
-
- complete(&ch->done);
-}
-
-static struct ib_cqe srp_drain_cqe = {
- .done = srp_drain_done,
-};
-
/**
* srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel.
*
- * Change a queue pair into the error state and wait until all receive
- * completions have been processed before destroying it. This avoids that
- * the receive completion handler can access the queue pair while it is
+ * Drain the qp before destroying it. This avoids that the receive
+ * completion handler can access the queue pair while it is
* being destroyed.
*/
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
- static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
- static struct ib_recv_wr wr = { 0 };
- struct ib_recv_wr *bad_wr;
- int ret;
-
- wr.wr_cqe = &srp_drain_cqe;
- /* Destroying a QP and reusing ch->done is only safe if not connected */
- WARN_ON_ONCE(ch->connected);
-
- ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
- WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
- if (ret)
- goto out;
-
- init_completion(&ch->done);
- ret = ib_post_recv(ch->qp, &wr, &bad_wr);
- WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
- if (ret == 0)
- wait_for_completion(&ch->done);
-
-out:
+ ib_drain_rq(ch->qp);
ib_destroy_qp(ch->qp);
}
@@ -508,7 +476,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
if (!init_attr)
return -ENOMEM;
- /* queue_size + 1 for ib_drain_qp */
+ /* queue_size + 1 for ib_drain_rq() */
recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
ch->comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(recv_cq)) {
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 0c37fee363b1..25bdaeef2520 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -91,76 +91,32 @@ MODULE_PARM_DESC(srpt_service_guid,
" instead of using the node_guid of the first HCA.");
static struct ib_client srpt_client;
-static void srpt_release_channel(struct srpt_rdma_ch *ch);
+static void srpt_release_cmd(struct se_cmd *se_cmd);
+static void srpt_free_ch(struct kref *kref);
static int srpt_queue_status(struct se_cmd *cmd);
static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_process_wait_list(struct srpt_rdma_ch *ch);
-/**
- * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
- */
-static inline
-enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir)
-{
- switch (dir) {
- case DMA_TO_DEVICE: return DMA_FROM_DEVICE;
- case DMA_FROM_DEVICE: return DMA_TO_DEVICE;
- default: return dir;
- }
-}
-
-/**
- * srpt_sdev_name() - Return the name associated with the HCA.
- *
- * Examples are ib0, ib1, ...
- */
-static inline const char *srpt_sdev_name(struct srpt_device *sdev)
-{
- return sdev->device->name;
-}
-
-static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch)
-{
- unsigned long flags;
- enum rdma_ch_state state;
-
- spin_lock_irqsave(&ch->spinlock, flags);
- state = ch->state;
- spin_unlock_irqrestore(&ch->spinlock, flags);
- return state;
-}
-
-static enum rdma_ch_state
-srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state)
-{
- unsigned long flags;
- enum rdma_ch_state prev;
-
- spin_lock_irqsave(&ch->spinlock, flags);
- prev = ch->state;
- ch->state = new_state;
- spin_unlock_irqrestore(&ch->spinlock, flags);
- return prev;
-}
-
-/**
- * srpt_test_and_set_ch_state() - Test and set the channel state.
- *
- * Returns true if and only if the channel state has been set to the new state.
+/*
+ * The only allowed channel state changes are those that change the channel
+ * state into a state with a higher numerical value. Hence the new > prev test.
*/
-static bool
-srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old,
- enum rdma_ch_state new)
+static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new)
{
unsigned long flags;
enum rdma_ch_state prev;
+ bool changed = false;
spin_lock_irqsave(&ch->spinlock, flags);
prev = ch->state;
- if (prev == old)
+ if (new > prev) {
ch->state = new;
+ changed = true;
+ }
spin_unlock_irqrestore(&ch->spinlock, flags);
- return prev == old;
+
+ return changed;
}
/**
@@ -182,7 +138,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
return;
pr_debug("ASYNC event= %d on device= %s\n", event->event,
- srpt_sdev_name(sdev));
+ sdev->device->name);
switch (event->event) {
case IB_EVENT_PORT_ERR:
@@ -220,25 +176,39 @@ static void srpt_srq_event(struct ib_event *event, void *ctx)
pr_info("SRQ event %d\n", event->event);
}
+static const char *get_ch_state_name(enum rdma_ch_state s)
+{
+ switch (s) {
+ case CH_CONNECTING:
+ return "connecting";
+ case CH_LIVE:
+ return "live";
+ case CH_DISCONNECTING:
+ return "disconnecting";
+ case CH_DRAINING:
+ return "draining";
+ case CH_DISCONNECTED:
+ return "disconnected";
+ }
+ return "???";
+}
+
/**
* srpt_qp_event() - QP event callback function.
*/
static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
{
pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n",
- event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch));
+ event->event, ch->cm_id, ch->sess_name, ch->state);
switch (event->event) {
case IB_EVENT_COMM_EST:
ib_cm_notify(ch->cm_id, event->event);
break;
case IB_EVENT_QP_LAST_WQE_REACHED:
- if (srpt_test_and_set_ch_state(ch, CH_DRAINING,
- CH_RELEASING))
- srpt_release_channel(ch);
- else
- pr_debug("%s: state %d - ignored LAST_WQE.\n",
- ch->sess_name, srpt_get_ch_state(ch));
+ pr_debug("%s-%d, state %s: received Last WQE event.\n",
+ ch->sess_name, ch->qp->qp_num,
+ get_ch_state_name(ch->state));
break;
default:
pr_err("received unrecognized IB QP event %d\n", event->event);
@@ -281,7 +251,7 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
struct ib_class_port_info *cif;
cif = (struct ib_class_port_info *)mad->data;
- memset(cif, 0, sizeof *cif);
+ memset(cif, 0, sizeof(*cif));
cif->base_version = 1;
cif->class_version = 1;
cif->resp_time_value = 20;
@@ -340,7 +310,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
return;
}
- memset(iocp, 0, sizeof *iocp);
+ memset(iocp, 0, sizeof(*iocp));
strcpy(iocp->id_string, SRPT_ID_STRING);
iocp->guid = cpu_to_be64(srpt_service_guid);
iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
@@ -390,7 +360,7 @@ static void srpt_get_svc_entries(u64 ioc_guid,
}
svc_entries = (struct ib_dm_svc_entries *)mad->data;
- memset(svc_entries, 0, sizeof *svc_entries);
+ memset(svc_entries, 0, sizeof(*svc_entries));
svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
snprintf(svc_entries->service_entries[0].name,
sizeof(svc_entries->service_entries[0].name),
@@ -484,7 +454,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
rsp->ah = ah;
dm_mad = rsp->mad;
- memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
+ memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad));
dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
dm_mad->mad_hdr.status = 0;
@@ -532,7 +502,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
struct ib_port_attr port_attr;
int ret;
- memset(&port_modify, 0, sizeof port_modify);
+ memset(&port_modify, 0, sizeof(port_modify));
port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
port_modify.clr_port_cap_mask = 0;
@@ -553,7 +523,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
goto err_query_port;
if (!sport->mad_agent) {
- memset(&reg_req, 0, sizeof reg_req);
+ memset(&reg_req, 0, sizeof(reg_req));
reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
@@ -841,6 +811,39 @@ out:
}
/**
+ * srpt_zerolength_write() - Perform a zero-length RDMA write.
+ *
+ * A quote from the InfiniBand specification: C9-88: For an HCA responder
+ * using Reliable Connection service, for each zero-length RDMA READ or WRITE
+ * request, the R_Key shall not be validated, even if the request includes
+ * Immediate data.
+ */
+static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
+{
+ struct ib_send_wr wr, *bad_wr;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.opcode = IB_WR_RDMA_WRITE;
+ wr.wr_cqe = &ch->zw_cqe;
+ wr.send_flags = IB_SEND_SIGNALED;
+ return ib_post_send(ch->qp, &wr, &bad_wr);
+}
+
+static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srpt_rdma_ch *ch = cq->cq_context;
+
+ if (wc->status == IB_WC_SUCCESS) {
+ srpt_process_wait_list(ch);
+ } else {
+ if (srpt_set_ch_state(ch, CH_DISCONNECTED))
+ schedule_work(&ch->release_work);
+ else
+ WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
+ }
+}
+
+/**
* srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
* @ioctx: Pointer to the I/O context associated with the request.
* @srp_cmd: Pointer to the SRP_CMD request data.
@@ -903,14 +906,14 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
db = (struct srp_direct_buf *)(srp_cmd->add_data
+ add_cdb_offset);
- memcpy(ioctx->rbufs, db, sizeof *db);
+ memcpy(ioctx->rbufs, db, sizeof(*db));
*data_len = be32_to_cpu(db->len);
} else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
idb = (struct srp_indirect_buf *)(srp_cmd->add_data
+ add_cdb_offset);
- ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
+ ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db);
if (ioctx->n_rbuf >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
@@ -929,7 +932,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
ioctx->rbufs = &ioctx->single_rbuf;
else {
ioctx->rbufs =
- kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
+ kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC);
if (!ioctx->rbufs) {
ioctx->n_rbuf = 0;
ret = -ENOMEM;
@@ -938,7 +941,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
}
db = idb->desc_list;
- memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
+ memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db));
*data_len = be32_to_cpu(idb->len);
}
out:
@@ -956,7 +959,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
struct ib_qp_attr *attr;
int ret;
- attr = kzalloc(sizeof *attr, GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
@@ -1070,7 +1073,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
dir = ioctx->cmd.data_direction;
BUG_ON(dir == DMA_NONE);
ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
- opposite_dma_dir(dir));
+ target_reverse_dma_direction(&ioctx->cmd));
ioctx->mapped_sg_count = 0;
}
}
@@ -1107,7 +1110,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
- opposite_dma_dir(dir));
+ target_reverse_dma_direction(cmd));
if (unlikely(!count))
return -EAGAIN;
@@ -1313,10 +1316,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
/*
* If the command is in a state where the target core is waiting for
- * the ib_srpt driver, change the state to the next state. Changing
- * the state of the command from SRPT_STATE_NEED_DATA to
- * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this
- * function a second time.
+ * the ib_srpt driver, change the state to the next state.
*/
spin_lock_irqsave(&ioctx->spinlock, flags);
@@ -1325,25 +1325,17 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
case SRPT_STATE_NEED_DATA:
ioctx->state = SRPT_STATE_DATA_IN;
break;
- case SRPT_STATE_DATA_IN:
case SRPT_STATE_CMD_RSP_SENT:
case SRPT_STATE_MGMT_RSP_SENT:
ioctx->state = SRPT_STATE_DONE;
break;
default:
+ WARN_ONCE(true, "%s: unexpected I/O context state %d\n",
+ __func__, state);
break;
}
spin_unlock_irqrestore(&ioctx->spinlock, flags);
- if (state == SRPT_STATE_DONE) {
- struct srpt_rdma_ch *ch = ioctx->ch;
-
- BUG_ON(ch->sess == NULL);
-
- target_put_sess_cmd(&ioctx->cmd);
- goto out;
- }
-
pr_debug("Aborting cmd with state %d and tag %lld\n", state,
ioctx->cmd.tag);
@@ -1351,19 +1343,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
case SRPT_STATE_NEW:
case SRPT_STATE_DATA_IN:
case SRPT_STATE_MGMT:
+ case SRPT_STATE_DONE:
/*
* Do nothing - defer abort processing until
* srpt_queue_response() is invoked.
*/
- WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false));
break;
case SRPT_STATE_NEED_DATA:
- /* DMA_TO_DEVICE (write) - RDMA read error. */
-
- /* XXX(hch): this is a horrible layering violation.. */
- spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
- ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;
- spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
+ pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag);
+ transport_generic_request_failure(&ioctx->cmd,
+ TCM_CHECK_CONDITION_ABORT_CMD);
break;
case SRPT_STATE_CMD_RSP_SENT:
/*
@@ -1371,18 +1360,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
* not been received in time.
*/
srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
- target_put_sess_cmd(&ioctx->cmd);
+ transport_generic_free_cmd(&ioctx->cmd, 0);
break;
case SRPT_STATE_MGMT_RSP_SENT:
- srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
- target_put_sess_cmd(&ioctx->cmd);
+ transport_generic_free_cmd(&ioctx->cmd, 0);
break;
default:
WARN(1, "Unexpected command state (%d)", state);
break;
}
-out:
return state;
}
@@ -1422,9 +1409,14 @@ static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ /*
+ * Note: if an RDMA write error completion is received that
+ * means that a SEND also has been posted. Defer further
+ * processing of the associated command until the send error
+ * completion has been received.
+ */
pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
ioctx, wc->status);
- srpt_abort_cmd(ioctx);
}
}
@@ -1464,7 +1456,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
sense_data_len = ioctx->cmd.scsi_sense_length;
WARN_ON(sense_data_len > sizeof(ioctx->sense_data));
- memset(srp_rsp, 0, sizeof *srp_rsp);
+ memset(srp_rsp, 0, sizeof(*srp_rsp));
srp_rsp->opcode = SRP_RSP;
srp_rsp->req_lim_delta =
cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
@@ -1514,7 +1506,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
srp_rsp = ioctx->ioctx.buf;
BUG_ON(!srp_rsp);
- memset(srp_rsp, 0, sizeof *srp_rsp);
+ memset(srp_rsp, 0, sizeof(*srp_rsp));
srp_rsp->opcode = SRP_RSP;
srp_rsp->req_lim_delta =
@@ -1528,80 +1520,6 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
return resp_len;
}
-#define NO_SUCH_LUN ((uint64_t)-1LL)
-
-/*
- * SCSI LUN addressing method. See also SAM-2 and the section about
- * eight byte LUNs.
- */
-enum scsi_lun_addr_method {
- SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0,
- SCSI_LUN_ADDR_METHOD_FLAT = 1,
- SCSI_LUN_ADDR_METHOD_LUN = 2,
- SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3,
-};
-
-/*
- * srpt_unpack_lun() - Convert from network LUN to linear LUN.
- *
- * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte
- * order (big endian) to a linear LUN. Supports three LUN addressing methods:
- * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40).
- */
-static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
-{
- uint64_t res = NO_SUCH_LUN;
- int addressing_method;
-
- if (unlikely(len < 2)) {
- pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
- len);
- goto out;
- }
-
- switch (len) {
- case 8:
- if ((*((__be64 *)lun) &
- cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0)
- goto out_err;
- break;
- case 4:
- if (*((__be16 *)&lun[2]) != 0)
- goto out_err;
- break;
- case 6:
- if (*((__be32 *)&lun[2]) != 0)
- goto out_err;
- break;
- case 2:
- break;
- default:
- goto out_err;
- }
-
- addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */
- switch (addressing_method) {
- case SCSI_LUN_ADDR_METHOD_PERIPHERAL:
- case SCSI_LUN_ADDR_METHOD_FLAT:
- case SCSI_LUN_ADDR_METHOD_LUN:
- res = *(lun + 1) | (((*lun) & 0x3f) << 8);
- break;
-
- case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
- default:
- pr_err("Unimplemented LUN addressing method %u\n",
- addressing_method);
- break;
- }
-
-out:
- return res;
-
-out_err:
- pr_err("Support for multi-level LUNs has not yet been implemented\n");
- goto out;
-}
-
static int srpt_check_stop_free(struct se_cmd *cmd)
{
struct srpt_send_ioctx *ioctx = container_of(cmd,
@@ -1613,16 +1531,14 @@ static int srpt_check_stop_free(struct se_cmd *cmd)
/**
* srpt_handle_cmd() - Process SRP_CMD.
*/
-static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
- struct srpt_recv_ioctx *recv_ioctx,
- struct srpt_send_ioctx *send_ioctx)
+static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
+ struct srpt_recv_ioctx *recv_ioctx,
+ struct srpt_send_ioctx *send_ioctx)
{
struct se_cmd *cmd;
struct srp_cmd *srp_cmd;
- uint64_t unpacked_lun;
u64 data_len;
enum dma_data_direction dir;
- sense_reason_t ret;
int rc;
BUG_ON(!send_ioctx);
@@ -1650,65 +1566,23 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
pr_err("0x%llx: parsing SRP descriptor table failed.\n",
srp_cmd->tag);
- ret = TCM_INVALID_CDB_FIELD;
- goto send_sense;
+ goto release_ioctx;
}
- unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun,
- sizeof(srp_cmd->lun));
rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
- &send_ioctx->sense_data[0], unpacked_lun, data_len,
- TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
+ &send_ioctx->sense_data[0],
+ scsilun_to_int(&srp_cmd->lun), data_len,
+ TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
if (rc != 0) {
- ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
- goto send_sense;
+ pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc,
+ srp_cmd->tag);
+ goto release_ioctx;
}
- return 0;
-
-send_sense:
- transport_send_check_condition_and_sense(cmd, ret, 0);
- return -1;
-}
-
-/**
- * srpt_rx_mgmt_fn_tag() - Process a task management function by tag.
- * @ch: RDMA channel of the task management request.
- * @fn: Task management function to perform.
- * @req_tag: Tag of the SRP task management request.
- * @mgmt_ioctx: I/O context of the task management request.
- *
- * Returns zero if the target core will process the task management
- * request asynchronously.
- *
- * Note: It is assumed that the initiator serializes tag-based task management
- * requests.
- */
-static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag)
-{
- struct srpt_device *sdev;
- struct srpt_rdma_ch *ch;
- struct srpt_send_ioctx *target;
- int ret, i;
+ return;
- ret = -EINVAL;
- ch = ioctx->ch;
- BUG_ON(!ch);
- BUG_ON(!ch->sport);
- sdev = ch->sport->sdev;
- BUG_ON(!sdev);
- spin_lock_irq(&sdev->spinlock);
- for (i = 0; i < ch->rq_size; ++i) {
- target = ch->ioctx_ring[i];
- if (target->cmd.se_lun == ioctx->cmd.se_lun &&
- target->cmd.tag == tag &&
- srpt_get_cmd_state(target) != SRPT_STATE_DONE) {
- ret = 0;
- /* now let the target core abort &target->cmd; */
- break;
- }
- }
- spin_unlock_irq(&sdev->spinlock);
- return ret;
+release_ioctx:
+ send_ioctx->state = SRPT_STATE_DONE;
+ srpt_release_cmd(cmd);
}
static int srp_tmr_to_tcm(int fn)
@@ -1744,8 +1618,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
struct srp_tsk_mgmt *srp_tsk;
struct se_cmd *cmd;
struct se_session *sess = ch->sess;
- uint64_t unpacked_lun;
- uint32_t tag = 0;
int tcm_tmr;
int rc;
@@ -1761,26 +1633,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
send_ioctx->cmd.tag = srp_tsk->tag;
tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func);
- if (tcm_tmr < 0) {
- send_ioctx->cmd.se_tmr_req->response =
- TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
- goto fail;
- }
- unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun,
- sizeof(srp_tsk->lun));
-
- if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) {
- rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag);
- if (rc < 0) {
- send_ioctx->cmd.se_tmr_req->response =
- TMR_TASK_DOES_NOT_EXIST;
- goto fail;
- }
- tag = srp_tsk->task_tag;
- }
- rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun,
- srp_tsk, tcm_tmr, GFP_KERNEL, tag,
- TARGET_SCF_ACK_KREF);
+ rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL,
+ scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr,
+ GFP_KERNEL, srp_tsk->task_tag,
+ TARGET_SCF_ACK_KREF);
if (rc != 0) {
send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED;
goto fail;
@@ -1800,7 +1656,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *send_ioctx)
{
struct srp_cmd *srp_cmd;
- enum rdma_ch_state ch_state;
BUG_ON(!ch);
BUG_ON(!recv_ioctx);
@@ -1809,13 +1664,12 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
recv_ioctx->ioctx.dma, srp_max_req_size,
DMA_FROM_DEVICE);
- ch_state = srpt_get_ch_state(ch);
- if (unlikely(ch_state == CH_CONNECTING)) {
+ if (unlikely(ch->state == CH_CONNECTING)) {
list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
goto out;
}
- if (unlikely(ch_state != CH_LIVE))
+ if (unlikely(ch->state != CH_LIVE))
goto out;
srp_cmd = recv_ioctx->ioctx.buf;
@@ -1878,6 +1732,28 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
}
+/*
+ * This function must be called from the context in which RDMA completions are
+ * processed because it accesses the wait list without protection against
+ * access from other threads.
+ */
+static void srpt_process_wait_list(struct srpt_rdma_ch *ch)
+{
+ struct srpt_send_ioctx *ioctx;
+
+ while (!list_empty(&ch->cmd_wait_list) &&
+ ch->state >= CH_LIVE &&
+ (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
+ struct srpt_recv_ioctx *recv_ioctx;
+
+ recv_ioctx = list_first_entry(&ch->cmd_wait_list,
+ struct srpt_recv_ioctx,
+ wait_list);
+ list_del(&recv_ioctx->wait_list);
+ srpt_handle_new_iu(ch, recv_ioctx, ioctx);
+ }
+}
+
/**
* Note: Although this has not yet been observed during tests, at least in
* theory it is possible that the srpt_get_send_ioctx() call invoked by
@@ -1905,15 +1781,10 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
atomic_inc(&ch->sq_wr_avail);
- if (wc->status != IB_WC_SUCCESS) {
+ if (wc->status != IB_WC_SUCCESS)
pr_info("sending response for ioctx 0x%p failed"
" with status %d\n", ioctx, wc->status);
- atomic_dec(&ch->req_lim);
- srpt_abort_cmd(ioctx);
- goto out;
- }
-
if (state != SRPT_STATE_DONE) {
srpt_unmap_sg_to_ib_sge(ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0);
@@ -1922,18 +1793,7 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
" wr_id = %u.\n", ioctx->ioctx.index);
}
-out:
- while (!list_empty(&ch->cmd_wait_list) &&
- srpt_get_ch_state(ch) == CH_LIVE &&
- (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
- struct srpt_recv_ioctx *recv_ioctx;
-
- recv_ioctx = list_first_entry(&ch->cmd_wait_list,
- struct srpt_recv_ioctx,
- wait_list);
- list_del(&recv_ioctx->wait_list);
- srpt_handle_new_iu(ch, recv_ioctx, ioctx);
- }
+ srpt_process_wait_list(ch);
}
/**
@@ -1950,7 +1810,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
WARN_ON(ch->rq_size < 1);
ret = -ENOMEM;
- qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
+ qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL);
if (!qp_init)
goto out;
@@ -2017,168 +1877,102 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
}
/**
- * __srpt_close_ch() - Close an RDMA channel by setting the QP error state.
+ * srpt_close_ch() - Close an RDMA channel.
*
- * Reset the QP and make sure all resources associated with the channel will
- * be deallocated at an appropriate time.
+ * Make sure all resources associated with the channel will be deallocated at
+ * an appropriate time.
*
- * Note: The caller must hold ch->sport->sdev->spinlock.
+ * Returns true if and only if the channel state has been modified into
+ * CH_DRAINING.
*/
-static void __srpt_close_ch(struct srpt_rdma_ch *ch)
+static bool srpt_close_ch(struct srpt_rdma_ch *ch)
{
- enum rdma_ch_state prev_state;
- unsigned long flags;
+ int ret;
- spin_lock_irqsave(&ch->spinlock, flags);
- prev_state = ch->state;
- switch (prev_state) {
- case CH_CONNECTING:
- case CH_LIVE:
- ch->state = CH_DISCONNECTING;
- break;
- default:
- break;
+ if (!srpt_set_ch_state(ch, CH_DRAINING)) {
+ pr_debug("%s-%d: already closed\n", ch->sess_name,
+ ch->qp->qp_num);
+ return false;
}
- spin_unlock_irqrestore(&ch->spinlock, flags);
-
- switch (prev_state) {
- case CH_CONNECTING:
- ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0,
- NULL, 0);
- /* fall through */
- case CH_LIVE:
- if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
- pr_err("sending CM DREQ failed.\n");
- break;
- case CH_DISCONNECTING:
- break;
- case CH_DRAINING:
- case CH_RELEASING:
- break;
- }
-}
-
-/**
- * srpt_close_ch() - Close an RDMA channel.
- */
-static void srpt_close_ch(struct srpt_rdma_ch *ch)
-{
- struct srpt_device *sdev;
- sdev = ch->sport->sdev;
- spin_lock_irq(&sdev->spinlock);
- __srpt_close_ch(ch);
- spin_unlock_irq(&sdev->spinlock);
-}
+ kref_get(&ch->kref);
-/**
- * srpt_shutdown_session() - Whether or not a session may be shut down.
- */
-static int srpt_shutdown_session(struct se_session *se_sess)
-{
- struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
- unsigned long flags;
+ ret = srpt_ch_qp_err(ch);
+ if (ret < 0)
+ pr_err("%s-%d: changing queue pair into error state failed: %d\n",
+ ch->sess_name, ch->qp->qp_num, ret);
- spin_lock_irqsave(&ch->spinlock, flags);
- if (ch->in_shutdown) {
- spin_unlock_irqrestore(&ch->spinlock, flags);
- return true;
+ pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
+ ch->qp->qp_num);
+ ret = srpt_zerolength_write(ch);
+ if (ret < 0) {
+ pr_err("%s-%d: queuing zero-length write failed: %d\n",
+ ch->sess_name, ch->qp->qp_num, ret);
+ if (srpt_set_ch_state(ch, CH_DISCONNECTED))
+ schedule_work(&ch->release_work);
+ else
+ WARN_ON_ONCE(true);
}
- ch->in_shutdown = true;
- target_sess_cmd_list_set_waiting(se_sess);
- spin_unlock_irqrestore(&ch->spinlock, flags);
+ kref_put(&ch->kref, srpt_free_ch);
return true;
}
-/**
- * srpt_drain_channel() - Drain a channel by resetting the IB queue pair.
- * @cm_id: Pointer to the CM ID of the channel to be drained.
- *
- * Note: Must be called from inside srpt_cm_handler to avoid a race between
- * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
- * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one()
- * waits until all target sessions for the associated IB device have been
- * unregistered and target session registration involves a call to
- * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until
- * this function has finished).
+/*
+ * Change the channel state into CH_DISCONNECTING. If a channel has not yet
+ * reached the connected state, close it. If a channel is in the connected
+ * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is
+ * the responsibility of the caller to ensure that this function is not
+ * invoked concurrently with the code that accepts a connection. This means
+ * that this function must either be invoked from inside a CM callback
+ * function or that it must be invoked with the srpt_port.mutex held.
*/
-static void srpt_drain_channel(struct ib_cm_id *cm_id)
+static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
{
- struct srpt_device *sdev;
- struct srpt_rdma_ch *ch;
int ret;
- bool do_reset = false;
- WARN_ON_ONCE(irqs_disabled());
+ if (!srpt_set_ch_state(ch, CH_DISCONNECTING))
+ return -ENOTCONN;
- sdev = cm_id->context;
- BUG_ON(!sdev);
- spin_lock_irq(&sdev->spinlock);
- list_for_each_entry(ch, &sdev->rch_list, list) {
- if (ch->cm_id == cm_id) {
- do_reset = srpt_test_and_set_ch_state(ch,
- CH_CONNECTING, CH_DRAINING) ||
- srpt_test_and_set_ch_state(ch,
- CH_LIVE, CH_DRAINING) ||
- srpt_test_and_set_ch_state(ch,
- CH_DISCONNECTING, CH_DRAINING);
- break;
- }
- }
- spin_unlock_irq(&sdev->spinlock);
+ ret = ib_send_cm_dreq(ch->cm_id, NULL, 0);
+ if (ret < 0)
+ ret = ib_send_cm_drep(ch->cm_id, NULL, 0);
- if (do_reset) {
- if (ch->sess)
- srpt_shutdown_session(ch->sess);
+ if (ret < 0 && srpt_close_ch(ch))
+ ret = 0;
- ret = srpt_ch_qp_err(ch);
- if (ret < 0)
- pr_err("Setting queue pair in error state"
- " failed: %d\n", ret);
- }
+ return ret;
}
-/**
- * srpt_find_channel() - Look up an RDMA channel.
- * @cm_id: Pointer to the CM ID of the channel to be looked up.
- *
- * Return NULL if no matching RDMA channel has been found.
- */
-static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
- struct ib_cm_id *cm_id)
+static void __srpt_close_all_ch(struct srpt_device *sdev)
{
struct srpt_rdma_ch *ch;
- bool found;
- WARN_ON_ONCE(irqs_disabled());
- BUG_ON(!sdev);
+ lockdep_assert_held(&sdev->mutex);
- found = false;
- spin_lock_irq(&sdev->spinlock);
list_for_each_entry(ch, &sdev->rch_list, list) {
- if (ch->cm_id == cm_id) {
- found = true;
- break;
- }
+ if (srpt_disconnect_ch(ch) >= 0)
+ pr_info("Closing channel %s-%d because target %s has been disabled\n",
+ ch->sess_name, ch->qp->qp_num,
+ sdev->device->name);
+ srpt_close_ch(ch);
}
- spin_unlock_irq(&sdev->spinlock);
-
- return found ? ch : NULL;
}
/**
- * srpt_release_channel() - Release channel resources.
- *
- * Schedules the actual release because:
- * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would
- * trigger a deadlock.
- * - It is not safe to call TCM transport_* functions from interrupt context.
+ * srpt_shutdown_session() - Whether or not a session may be shut down.
*/
-static void srpt_release_channel(struct srpt_rdma_ch *ch)
+static int srpt_shutdown_session(struct se_session *se_sess)
+{
+ return 1;
+}
+
+static void srpt_free_ch(struct kref *kref)
{
- schedule_work(&ch->release_work);
+ struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref);
+
+ kfree(ch);
}
static void srpt_release_channel_work(struct work_struct *w)
@@ -2188,8 +1982,8 @@ static void srpt_release_channel_work(struct work_struct *w)
struct se_session *se_sess;
ch = container_of(w, struct srpt_rdma_ch, release_work);
- pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess,
- ch->release_done);
+ pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name,
+ ch->qp->qp_num, ch->release_done);
sdev = ch->sport->sdev;
BUG_ON(!sdev);
@@ -2197,6 +1991,7 @@ static void srpt_release_channel_work(struct work_struct *w)
se_sess = ch->sess;
BUG_ON(!se_sess);
+ target_sess_cmd_list_set_waiting(se_sess);
target_wait_for_sess_cmds(se_sess);
transport_deregister_session_configfs(se_sess);
@@ -2211,16 +2006,15 @@ static void srpt_release_channel_work(struct work_struct *w)
ch->sport->sdev, ch->rq_size,
ch->rsp_size, DMA_TO_DEVICE);
- spin_lock_irq(&sdev->spinlock);
- list_del(&ch->list);
- spin_unlock_irq(&sdev->spinlock);
-
+ mutex_lock(&sdev->mutex);
+ list_del_init(&ch->list);
if (ch->release_done)
complete(ch->release_done);
+ mutex_unlock(&sdev->mutex);
wake_up(&sdev->ch_releaseQ);
- kfree(ch);
+ kref_put(&ch->kref, srpt_free_ch);
}
/**
@@ -2266,9 +2060,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
- rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
- rej = kzalloc(sizeof *rej, GFP_KERNEL);
- rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
+ rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+ rej = kzalloc(sizeof(*rej), GFP_KERNEL);
+ rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL);
if (!rsp || !rej || !rep_param) {
ret = -ENOMEM;
@@ -2297,7 +2091,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
- spin_lock_irq(&sdev->spinlock);
+ mutex_lock(&sdev->mutex);
list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
@@ -2305,26 +2099,16 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
&& param->port == ch->sport->port
&& param->listen_id == ch->sport->sdev->cm_id
&& ch->cm_id) {
- enum rdma_ch_state ch_state;
-
- ch_state = srpt_get_ch_state(ch);
- if (ch_state != CH_CONNECTING
- && ch_state != CH_LIVE)
+ if (srpt_disconnect_ch(ch) < 0)
continue;
-
- /* found an existing channel */
- pr_debug("Found existing channel %s"
- " cm_id= %p state= %d\n",
- ch->sess_name, ch->cm_id, ch_state);
-
- __srpt_close_ch(ch);
-
+ pr_info("Relogin - closed existing channel %s\n",
+ ch->sess_name);
rsp->rsp_flags =
SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
}
}
- spin_unlock_irq(&sdev->spinlock);
+ mutex_unlock(&sdev->mutex);
} else
rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
@@ -2340,7 +2124,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto reject;
}
- ch = kzalloc(sizeof *ch, GFP_KERNEL);
+ ch = kzalloc(sizeof(*ch), GFP_KERNEL);
if (!ch) {
rej->reason = cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
@@ -2349,11 +2133,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto reject;
}
+ kref_init(&ch->kref);
+ ch->zw_cqe.done = srpt_zerolength_write_done;
INIT_WORK(&ch->release_work, srpt_release_channel_work);
memcpy(ch->i_port_id, req->initiator_port_id, 16);
memcpy(ch->t_port_id, req->target_port_id, 16);
ch->sport = &sdev->port[param->port - 1];
ch->cm_id = cm_id;
+ cm_id->context = ch;
/*
* Avoid QUEUE_FULL conditions by limiting the number of buffers used
* for the SRP protocol to the command queue size.
@@ -2453,7 +2240,7 @@ try_again:
/* create cm reply */
rep_param->qp_num = ch->qp->qp_num;
rep_param->private_data = (void *)rsp;
- rep_param->private_data_len = sizeof *rsp;
+ rep_param->private_data_len = sizeof(*rsp);
rep_param->rnr_retry_count = 7;
rep_param->flow_control = 1;
rep_param->failover_accepted = 0;
@@ -2468,14 +2255,14 @@ try_again:
goto release_channel;
}
- spin_lock_irq(&sdev->spinlock);
+ mutex_lock(&sdev->mutex);
list_add_tail(&ch->list, &sdev->rch_list);
- spin_unlock_irq(&sdev->spinlock);
+ mutex_unlock(&sdev->mutex);
goto out;
release_channel:
- srpt_set_ch_state(ch, CH_RELEASING);
+ srpt_disconnect_ch(ch);
transport_deregister_session_configfs(ch->sess);
transport_deregister_session(ch->sess);
ch->sess = NULL;
@@ -2497,7 +2284,7 @@ reject:
| SRP_BUF_FORMAT_INDIRECT);
ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
- (void *)rej, sizeof *rej);
+ (void *)rej, sizeof(*rej));
out:
kfree(rep_param);
@@ -2507,10 +2294,23 @@ out:
return ret;
}
-static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
+static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
+ enum ib_cm_rej_reason reason,
+ const u8 *private_data,
+ u8 private_data_len)
{
- pr_info("Received IB REJ for cm_id %p.\n", cm_id);
- srpt_drain_channel(cm_id);
+ char *priv = NULL;
+ int i;
+
+ if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1,
+ GFP_KERNEL))) {
+ for (i = 0; i < private_data_len; i++)
+ sprintf(priv + 3 * i, " %02x", private_data[i]);
+ }
+ pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n",
+ ch->sess_name, ch->qp->qp_num, reason, private_data_len ?
+ "; private data" : "", priv ? priv : " (?)");
+ kfree(priv);
}
/**
@@ -2519,87 +2319,23 @@ static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
* An IB_CM_RTU_RECEIVED message indicates that the connection is established
* and that the recipient may begin transmitting (RTU = ready to use).
*/
-static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
+static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
{
- struct srpt_rdma_ch *ch;
int ret;
- ch = srpt_find_channel(cm_id->context, cm_id);
- BUG_ON(!ch);
-
- if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) {
- struct srpt_recv_ioctx *ioctx, *ioctx_tmp;
-
+ if (srpt_set_ch_state(ch, CH_LIVE)) {
ret = srpt_ch_qp_rts(ch, ch->qp);
- list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
- wait_list) {
- list_del(&ioctx->wait_list);
- srpt_handle_new_iu(ch, ioctx, NULL);
- }
- if (ret)
+ if (ret == 0) {
+ /* Trigger wait list processing. */
+ ret = srpt_zerolength_write(ch);
+ WARN_ONCE(ret < 0, "%d\n", ret);
+ } else {
srpt_close_ch(ch);
+ }
}
}
-static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
-{
- pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
- srpt_drain_channel(cm_id);
-}
-
-static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
-{
- pr_info("Received IB REP error for cm_id %p.\n", cm_id);
- srpt_drain_channel(cm_id);
-}
-
-/**
- * srpt_cm_dreq_recv() - Process reception of a DREQ message.
- */
-static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
-{
- struct srpt_rdma_ch *ch;
- unsigned long flags;
- bool send_drep = false;
-
- ch = srpt_find_channel(cm_id->context, cm_id);
- BUG_ON(!ch);
-
- pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch));
-
- spin_lock_irqsave(&ch->spinlock, flags);
- switch (ch->state) {
- case CH_CONNECTING:
- case CH_LIVE:
- send_drep = true;
- ch->state = CH_DISCONNECTING;
- break;
- case CH_DISCONNECTING:
- case CH_DRAINING:
- case CH_RELEASING:
- WARN(true, "unexpected channel state %d\n", ch->state);
- break;
- }
- spin_unlock_irqrestore(&ch->spinlock, flags);
-
- if (send_drep) {
- if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
- pr_err("Sending IB DREP failed.\n");
- pr_info("Received DREQ and sent DREP for session %s.\n",
- ch->sess_name);
- }
-}
-
-/**
- * srpt_cm_drep_recv() - Process reception of a DREP message.
- */
-static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
-{
- pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
- srpt_drain_channel(cm_id);
-}
-
/**
* srpt_cm_handler() - IB connection manager callback function.
*
@@ -2612,6 +2348,7 @@ static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
*/
static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
+ struct srpt_rdma_ch *ch = cm_id->context;
int ret;
ret = 0;
@@ -2621,32 +2358,39 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
event->private_data);
break;
case IB_CM_REJ_RECEIVED:
- srpt_cm_rej_recv(cm_id);
+ srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason,
+ event->private_data,
+ IB_CM_REJ_PRIVATE_DATA_SIZE);
break;
case IB_CM_RTU_RECEIVED:
case IB_CM_USER_ESTABLISHED:
- srpt_cm_rtu_recv(cm_id);
+ srpt_cm_rtu_recv(ch);
break;
case IB_CM_DREQ_RECEIVED:
- srpt_cm_dreq_recv(cm_id);
+ srpt_disconnect_ch(ch);
break;
case IB_CM_DREP_RECEIVED:
- srpt_cm_drep_recv(cm_id);
+ pr_info("Received CM DREP message for ch %s-%d.\n",
+ ch->sess_name, ch->qp->qp_num);
+ srpt_close_ch(ch);
break;
case IB_CM_TIMEWAIT_EXIT:
- srpt_cm_timewait_exit(cm_id);
+ pr_info("Received CM TimeWait exit for ch %s-%d.\n",
+ ch->sess_name, ch->qp->qp_num);
+ srpt_close_ch(ch);
break;
case IB_CM_REP_ERROR:
- srpt_cm_rep_error(cm_id);
+ pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name,
+ ch->qp->qp_num);
break;
case IB_CM_DREQ_ERROR:
- pr_info("Received IB DREQ ERROR event.\n");
+ pr_info("Received CM DREQ ERROR event.\n");
break;
case IB_CM_MRA_RECEIVED:
- pr_info("Received IB MRA event\n");
+ pr_info("Received CM MRA event\n");
break;
default:
- pr_err("received unrecognized IB CM event %d\n", event->event);
+ pr_err("received unrecognized CM event %d\n", event->event);
break;
}
@@ -2755,41 +2499,14 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd)
*/
static int srpt_write_pending(struct se_cmd *se_cmd)
{
- struct srpt_rdma_ch *ch;
- struct srpt_send_ioctx *ioctx;
+ struct srpt_send_ioctx *ioctx =
+ container_of(se_cmd, struct srpt_send_ioctx, cmd);
+ struct srpt_rdma_ch *ch = ioctx->ch;
enum srpt_command_state new_state;
- enum rdma_ch_state ch_state;
- int ret;
-
- ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
WARN_ON(new_state == SRPT_STATE_DONE);
-
- ch = ioctx->ch;
- BUG_ON(!ch);
-
- ch_state = srpt_get_ch_state(ch);
- switch (ch_state) {
- case CH_CONNECTING:
- WARN(true, "unexpected channel state %d\n", ch_state);
- ret = -EINVAL;
- goto out;
- case CH_LIVE:
- break;
- case CH_DISCONNECTING:
- case CH_DRAINING:
- case CH_RELEASING:
- pr_debug("cmd with tag %lld: channel disconnecting\n",
- ioctx->cmd.tag);
- srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
- ret = -EINVAL;
- goto out;
- }
- ret = srpt_xfer_data(ch, ioctx);
-
-out:
- return ret;
+ return srpt_xfer_data(ch, ioctx);
}
static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
@@ -2920,36 +2637,25 @@ static void srpt_refresh_port_work(struct work_struct *work)
srpt_refresh_port(sport);
}
-static int srpt_ch_list_empty(struct srpt_device *sdev)
-{
- int res;
-
- spin_lock_irq(&sdev->spinlock);
- res = list_empty(&sdev->rch_list);
- spin_unlock_irq(&sdev->spinlock);
-
- return res;
-}
-
/**
* srpt_release_sdev() - Free the channel resources associated with a target.
*/
static int srpt_release_sdev(struct srpt_device *sdev)
{
- struct srpt_rdma_ch *ch, *tmp_ch;
- int res;
+ int i, res;
WARN_ON_ONCE(irqs_disabled());
BUG_ON(!sdev);
- spin_lock_irq(&sdev->spinlock);
- list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
- __srpt_close_ch(ch);
- spin_unlock_irq(&sdev->spinlock);
+ mutex_lock(&sdev->mutex);
+ for (i = 0; i < ARRAY_SIZE(sdev->port); i++)
+ sdev->port[i].enabled = false;
+ __srpt_close_all_ch(sdev);
+ mutex_unlock(&sdev->mutex);
res = wait_event_interruptible(sdev->ch_releaseQ,
- srpt_ch_list_empty(sdev));
+ list_empty_careful(&sdev->rch_list));
if (res)
pr_err("%s: interrupted.\n", __func__);
@@ -3003,14 +2709,14 @@ static void srpt_add_one(struct ib_device *device)
pr_debug("device = %p, device->dma_ops = %p\n", device,
device->dma_ops);
- sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev)
goto err;
sdev->device = device;
INIT_LIST_HEAD(&sdev->rch_list);
init_waitqueue_head(&sdev->ch_releaseQ);
- spin_lock_init(&sdev->spinlock);
+ mutex_init(&sdev->mutex);
sdev->pd = ib_alloc_pd(device);
if (IS_ERR(sdev->pd))
@@ -3082,7 +2788,7 @@ static void srpt_add_one(struct ib_device *device)
if (srpt_refresh_port(sport)) {
pr_err("MAD registration failed for %s-%d.\n",
- srpt_sdev_name(sdev), i);
+ sdev->device->name, i);
goto err_ring;
}
snprintf(sport->port_guid, sizeof(sport->port_guid),
@@ -3231,24 +2937,26 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
static void srpt_close_session(struct se_session *se_sess)
{
DECLARE_COMPLETION_ONSTACK(release_done);
- struct srpt_rdma_ch *ch;
- struct srpt_device *sdev;
- unsigned long res;
-
- ch = se_sess->fabric_sess_ptr;
- WARN_ON(ch->sess != se_sess);
+ struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
+ struct srpt_device *sdev = ch->sport->sdev;
+ bool wait;
- pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch));
+ pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
+ ch->state);
- sdev = ch->sport->sdev;
- spin_lock_irq(&sdev->spinlock);
+ mutex_lock(&sdev->mutex);
BUG_ON(ch->release_done);
ch->release_done = &release_done;
- __srpt_close_ch(ch);
- spin_unlock_irq(&sdev->spinlock);
+ wait = !list_empty(&ch->list);
+ srpt_disconnect_ch(ch);
+ mutex_unlock(&sdev->mutex);
- res = wait_for_completion_timeout(&release_done, 60 * HZ);
- WARN_ON(res == 0);
+ if (!wait)
+ return;
+
+ while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0)
+ pr_info("%s(%s-%d state %d): still waiting ...\n", __func__,
+ ch->sess_name, ch->qp->qp_num, ch->state);
}
/**
@@ -3456,6 +3164,8 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
{
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_device *sdev = sport->sdev;
+ struct srpt_rdma_ch *ch;
unsigned long tmp;
int ret;
@@ -3469,11 +3179,24 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
return -EINVAL;
}
- if (tmp == 1)
- sport->enabled = true;
- else
- sport->enabled = false;
+ if (sport->enabled == tmp)
+ goto out;
+ sport->enabled = tmp;
+ if (sport->enabled)
+ goto out;
+
+ mutex_lock(&sdev->mutex);
+ list_for_each_entry(ch, &sdev->rch_list, list) {
+ if (ch->sport == sport) {
+ pr_debug("%s: ch %p %s-%d\n", __func__, ch,
+ ch->sess_name, ch->qp->qp_num);
+ srpt_disconnect_ch(ch);
+ srpt_close_ch(ch);
+ }
+ }
+ mutex_unlock(&sdev->mutex);
+out:
return count;
}
@@ -3565,7 +3288,6 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
static const struct target_core_fabric_ops srpt_template = {
.module = THIS_MODULE,
.name = "srpt",
- .node_acl_size = sizeof(struct srpt_node_acl),
.get_fabric_name = srpt_get_fabric_name,
.tpg_get_wwn = srpt_get_fabric_wwn,
.tpg_get_tag = srpt_get_tag,
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 09037f2b0b51..af9b8b527340 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -218,20 +218,20 @@ struct srpt_send_ioctx {
/**
* enum rdma_ch_state - SRP channel state.
- * @CH_CONNECTING: QP is in RTR state; waiting for RTU.
- * @CH_LIVE: QP is in RTS state.
- * @CH_DISCONNECTING: DREQ has been received; waiting for DREP
- * or DREQ has been send and waiting for DREP
- * or .
- * @CH_DRAINING: QP is in ERR state; waiting for last WQE event.
- * @CH_RELEASING: Last WQE event has been received; releasing resources.
+ * @CH_CONNECTING: QP is in RTR state; waiting for RTU.
+ * @CH_LIVE: QP is in RTS state.
+ * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has
+ * been received.
+ * @CH_DRAINING: DREP has been received or waiting for DREP timed out
+ * and last work request has been queued.
+ * @CH_DISCONNECTED: Last completion has been received.
*/
enum rdma_ch_state {
CH_CONNECTING,
CH_LIVE,
CH_DISCONNECTING,
CH_DRAINING,
- CH_RELEASING
+ CH_DISCONNECTED,
};
/**
@@ -267,6 +267,8 @@ struct srpt_rdma_ch {
struct ib_cm_id *cm_id;
struct ib_qp *qp;
struct ib_cq *cq;
+ struct ib_cqe zw_cqe;
+ struct kref kref;
int rq_size;
u32 rsp_size;
atomic_t sq_wr_avail;
@@ -286,7 +288,6 @@ struct srpt_rdma_ch {
u8 sess_name[36];
struct work_struct release_work;
struct completion *release_done;
- bool in_shutdown;
};
/**
@@ -343,7 +344,7 @@ struct srpt_port {
* @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
* @ch_releaseQ: Enables waiting for removal from rch_list.
- * @spinlock: Protects rch_list and tpg.
+ * @mutex: Protects rch_list.
* @port: Information about the ports owned by this HCA.
* @event_handler: Per-HCA asynchronous IB event handler.
* @list: Node in srpt_dev_list.
@@ -357,18 +358,10 @@ struct srpt_device {
struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list;
wait_queue_head_t ch_releaseQ;
- spinlock_t spinlock;
+ struct mutex mutex;
struct srpt_port port[2];
struct ib_event_handler event_handler;
struct list_head list;
};
-/**
- * struct srpt_node_acl - Per-initiator ACL data (managed via configfs).
- * @nacl: Target core node ACL information.
- */
-struct srpt_node_acl {
- struct se_node_acl nacl;
-};
-
#endif /* IB_SRPT_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d66c690a8597..e97094598b2d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -157,7 +157,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[29] = "802.1ad offload support",
[31] = "Modifying loopback source checks using UPDATE_QP support",
[32] = "Loopback source checks support",
- [33] = "RoCEv2 support"
+ [33] = "RoCEv2 support",
+ [34] = "DMFS Sniffer support (UC & MC)"
};
int i;
@@ -810,6 +811,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
if (field & 0x80)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+ if (field & 0x20)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER;
MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
if (field & 0x80)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1d4e2e054647..42d8de892bfe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -752,8 +752,10 @@ static const u8 __promisc_mode[] = {
[MLX4_FS_REGULAR] = 0x0,
[MLX4_FS_ALL_DEFAULT] = 0x1,
[MLX4_FS_MC_DEFAULT] = 0x3,
- [MLX4_FS_UC_SNIFFER] = 0x4,
- [MLX4_FS_MC_SNIFFER] = 0x5,
+ [MLX4_FS_MIRROR_RX_PORT] = 0x4,
+ [MLX4_FS_MIRROR_SX_PORT] = 0x5,
+ [MLX4_FS_UC_SNIFFER] = 0x6,
+ [MLX4_FS_MC_SNIFFER] = 0x7,
};
int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba8d7ed..bf3446794bd5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -77,6 +77,9 @@
#define KERNEL_NUM_PRIOS 1
#define KENREL_MIN_LEVEL 2
+#define ANCHOR_MAX_FT 1
+#define ANCHOR_NUM_PRIOS 1
+#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
struct node_caps {
size_t arr_sz;
long *caps;
@@ -92,7 +95,7 @@ static struct init_tree_node {
int max_ft;
} root_fs = {
.type = FS_TYPE_NAMESPACE,
- .ar_size = 3,
+ .ar_size = 4,
.children = (struct init_tree_node[]) {
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
@@ -108,6 +111,8 @@ static struct init_tree_node {
FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))),
+ ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))),
}
};
@@ -196,8 +201,10 @@ static void tree_put_node(struct fs_node *node)
static int tree_remove_node(struct fs_node *node)
{
- if (atomic_read(&node->refcount) > 1)
- return -EPERM;
+ if (atomic_read(&node->refcount) > 1) {
+ atomic_dec(&node->refcount);
+ return -EEXIST;
+ }
tree_put_node(node);
return 0;
}
@@ -360,6 +367,11 @@ static void del_rule(struct fs_node *node)
memcpy(match_value, fte->val, sizeof(fte->val));
fs_get_obj(ft, fg->node.parent);
list_del(&rule->node.list);
+ if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ mutex_lock(&rule->dest_attr.ft->lock);
+ list_del(&rule->next_ft);
+ mutex_unlock(&rule->dest_attr.ft->lock);
+ }
fte->dests_size--;
if (fte->dests_size) {
err = mlx5_cmd_update_fte(dev, ft,
@@ -465,6 +477,8 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
ft->node.type = FS_TYPE_FLOW_TABLE;
ft->type = table_type;
ft->max_fte = max_fte;
+ INIT_LIST_HEAD(&ft->fwd_rules);
+ mutex_init(&ft->lock);
return ft;
}
@@ -601,9 +615,63 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
return err;
}
+static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int err = 0;
+
+ fs_get_obj(fte, rule->node.parent);
+ if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return -EINVAL;
+ lock_ref_node(&fte->node);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ err = mlx5_cmd_update_fte(get_dev(&ft->node),
+ ft, fg->id, fte);
+ unlock_ref_node(&fte->node);
+
+ return err;
+}
+
+/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */
+static int connect_fwd_rules(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *new_next_ft,
+ struct mlx5_flow_table *old_next_ft)
+{
+ struct mlx5_flow_destination dest;
+ struct mlx5_flow_rule *iter;
+ int err = 0;
+
+ /* new_next_ft and old_next_ft could be NULL only
+ * when we create/destroy the anchor flow table.
+ */
+ if (!new_next_ft || !old_next_ft)
+ return 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = new_next_ft;
+
+ mutex_lock(&old_next_ft->lock);
+ list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
+ mutex_unlock(&old_next_ft->lock);
+ list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
+ err = mlx5_modify_rule_destination(iter, &dest);
+ if (err)
+ pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
+ new_next_ft->id);
+ }
+ return 0;
+}
+
static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
+ struct mlx5_flow_table *next_ft;
int err = 0;
/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
@@ -612,6 +680,11 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
err = connect_prev_fts(dev, ft, prio);
if (err)
return err;
+
+ next_ft = find_next_chained_ft(prio);
+ err = connect_fwd_rules(dev, ft, next_ft);
+ if (err)
+ return err;
}
if (MLX5_CAP_FLOWTABLE(dev,
@@ -762,6 +835,7 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
if (!rule)
return NULL;
+ INIT_LIST_HEAD(&rule->next_ft);
rule->node.type = FS_TYPE_FLOW_DEST;
memcpy(&rule->dest_attr, dest, sizeof(*dest));
@@ -782,9 +856,14 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
return ERR_PTR(-ENOMEM);
fs_get_obj(ft, fg->node.parent);
- /* Add dest to dests list- added as first element after the head */
+ /* Add dest to dests list- we need flow tables to be in the
+ * end of the list for forward to next prio rules.
+ */
tree_init_node(&rule->node, 1, del_rule);
- list_add_tail(&rule->node.list, &fte->node.children);
+ if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ list_add(&rule->node.list, &fte->node.children);
+ else
+ list_add_tail(&rule->node.list, &fte->node.children);
fte->dests_size++;
if (fte->dests_size == 1)
err = mlx5_cmd_create_fte(get_dev(&ft->node),
@@ -903,6 +982,25 @@ out:
return fg;
}
+static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+
+ list_for_each_entry(rule, &fte->node.children, node.list) {
+ if (rule->dest_attr.type == dest->type) {
+ if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ dest->vport_num == rule->dest_attr.vport_num) ||
+ (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ dest->ft == rule->dest_attr.ft) ||
+ (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ dest->tir_num == rule->dest_attr.tir_num))
+ return rule;
+ }
+ }
+ return NULL;
+}
+
static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
u32 *match_value,
u8 action,
@@ -919,6 +1017,13 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
if (compare_match_value(&fg->mask, match_value, &fte->val) &&
action == fte->action && flow_tag == fte->flow_tag) {
+ rule = find_flow_rule(fte, dest);
+ if (rule) {
+ atomic_inc(&rule->node.refcount);
+ unlock_ref_node(&fte->node);
+ unlock_ref_node(&fg->node);
+ return rule;
+ }
rule = add_rule_fte(fte, fg, dest);
unlock_ref_node(&fte->node);
if (IS_ERR(rule))
@@ -984,14 +1089,14 @@ static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft,
return rule;
}
-struct mlx5_flow_rule *
-mlx5_add_flow_rule(struct mlx5_flow_table *ft,
- u8 match_criteria_enable,
- u32 *match_criteria,
- u32 *match_value,
- u32 action,
- u32 flow_tag,
- struct mlx5_flow_destination *dest)
+static struct mlx5_flow_rule *
+_mlx5_add_flow_rule(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ u32 *match_criteria,
+ u32 *match_value,
+ u32 action,
+ u32 flow_tag,
+ struct mlx5_flow_destination *dest)
{
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule;
@@ -1014,6 +1119,63 @@ unlock:
unlock_ref_node(&ft->node);
return rule;
}
+
+static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
+{
+ return ((ft->type == FS_FT_NIC_RX) &&
+ (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
+}
+
+struct mlx5_flow_rule *
+mlx5_add_flow_rule(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ u32 *match_criteria,
+ u32 *match_value,
+ u32 action,
+ u32 flow_tag,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_flow_destination gen_dest;
+ struct mlx5_flow_table *next_ft = NULL;
+ struct mlx5_flow_rule *rule = NULL;
+ u32 sw_action = action;
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+ if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ if (!fwd_next_prio_supported(ft))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (dest)
+ return ERR_PTR(-EINVAL);
+ mutex_lock(&root->chain_lock);
+ next_ft = find_next_chained_ft(prio);
+ if (next_ft) {
+ gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ gen_dest.ft = next_ft;
+ dest = &gen_dest;
+ action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else {
+ mutex_unlock(&root->chain_lock);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ }
+
+ rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
+ match_value, action, flow_tag, dest);
+
+ if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ if (!IS_ERR_OR_NULL(rule) &&
+ (list_empty(&rule->next_ft))) {
+ mutex_lock(&next_ft->lock);
+ list_add(&rule->next_ft, &next_ft->fwd_rules);
+ mutex_unlock(&next_ft->lock);
+ rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ }
+ mutex_unlock(&root->chain_lock);
+ }
+ return rule;
+}
EXPORT_SYMBOL(mlx5_add_flow_rule);
void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
@@ -1077,6 +1239,10 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft)
return 0;
next_ft = find_next_chained_ft(prio);
+ err = connect_fwd_rules(dev, next_ft, ft);
+ if (err)
+ return err;
+
err = connect_prev_fts(dev, next_ft, prio);
if (err)
mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
@@ -1126,6 +1292,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
case MLX5_FLOW_NAMESPACE_BYPASS:
case MLX5_FLOW_NAMESPACE_KERNEL:
case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+ case MLX5_FLOW_NAMESPACE_ANCHOR:
prio = type;
break;
case MLX5_FLOW_NAMESPACE_FDB:
@@ -1351,6 +1518,25 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
}
}
+#define ANCHOR_PRIO 0
+#define ANCHOR_SIZE 1
+static int create_anchor_flow_table(struct mlx5_core_dev
+ *dev)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_flow_table *ft;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR);
+ if (!ns)
+ return -EINVAL;
+ ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE);
+ if (IS_ERR(ft)) {
+ mlx5_core_err(dev, "Failed to create last anchor flow table");
+ return PTR_ERR(ft);
+ }
+ return 0;
+}
+
static int init_root_ns(struct mlx5_core_dev *dev)
{
@@ -1363,6 +1549,9 @@ static int init_root_ns(struct mlx5_core_dev *dev)
set_prio_attrs(dev->priv.root_ns);
+ if (create_anchor_flow_table(dev))
+ goto cleanup;
+
return 0;
cleanup:
@@ -1392,6 +1581,15 @@ static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
root_ns = NULL;
}
+static void destroy_flow_tables(struct fs_prio *prio)
+{
+ struct mlx5_flow_table *iter;
+ struct mlx5_flow_table *tmp;
+
+ fs_for_each_ft_safe(iter, tmp, prio)
+ mlx5_destroy_flow_table(iter);
+}
+
static void cleanup_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
@@ -1420,6 +1618,7 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev)
list);
fs_get_obj(obj_iter_prio2, iter_prio2);
+ destroy_flow_tables(obj_iter_prio2);
if (tree_remove_node(iter_prio2)) {
mlx5_core_warn(dev,
"Priority %d wasn't destroyed, refcount > 1\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 00245fd7e4bc..f37a6248a27b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -68,6 +68,11 @@ struct fs_node {
struct mlx5_flow_rule {
struct fs_node node;
struct mlx5_flow_destination dest_attr;
+ /* next_ft should be accessed under chain_lock and only of
+ * destination type is FWD_NEXT_fT.
+ */
+ struct list_head next_ft;
+ u32 sw_action;
};
/* Type of children is mlx5_flow_group */
@@ -82,6 +87,10 @@ struct mlx5_flow_table {
unsigned int required_groups;
unsigned int num_groups;
} autogroup;
+ /* Protect fwd_rules */
+ struct mutex lock;
+ /* FWD rules that point on this flow table */
+ struct list_head fwd_rules;
};
/* Type of children is mlx5_flow_rule */
@@ -142,6 +151,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
#define fs_list_for_each_entry(pos, root) \
list_for_each_entry(pos, root, node.list)
+#define fs_list_for_each_entry_safe(pos, tmp, root) \
+ list_for_each_entry_safe(pos, tmp, root, node.list)
+
#define fs_for_each_ns_or_ft_reverse(pos, prio) \
list_for_each_entry_reverse(pos, &(prio)->node.children, list)
@@ -157,6 +169,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
#define fs_for_each_ft(pos, prio) \
fs_list_for_each_entry(pos, &(prio)->node.children)
+#define fs_for_each_ft_safe(pos, tmp, prio) \
+ fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children)
+
#define fs_for_each_fg(pos, ft) \
fs_list_for_each_entry(pos, &(ft)->node.children)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a0e8cc8dcc67..8541a913f6a3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -219,6 +219,7 @@ enum {
MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
+ MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34,
};
enum {
@@ -1160,6 +1161,8 @@ enum mlx4_net_trans_promisc_mode {
MLX4_FS_REGULAR = 1,
MLX4_FS_ALL_DEFAULT,
MLX4_FS_MC_DEFAULT,
+ MLX4_FS_MIRROR_RX_PORT,
+ MLX4_FS_MIRROR_SX_PORT,
MLX4_FS_UC_SNIFFER,
MLX4_FS_MC_SNIFFER,
MLX4_FS_MODE_NUM, /* should be last */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 12079fd20413..9566b3b3b2c5 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1318,6 +1318,11 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
}
-#define MLX5_BY_PASS_NUM_PRIOS 9
+#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8
+#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8
+#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
+#define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\
+ MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\
+ MLX5_BY_PASS_NUM_MULTICAST_PRIOS)
#endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 8230caa3fb6e..8dec5508d93d 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -38,6 +38,10 @@
#define MLX5_FS_DEFAULT_FLOW_TAG 0x0
+enum {
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16,
+};
+
#define LEFTOVERS_RULE_NUM 2
static inline void build_leftovers_ft_param(int *priority,
int *n_ent,
@@ -52,6 +56,7 @@ enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_BYPASS,
MLX5_FLOW_NAMESPACE_KERNEL,
MLX5_FLOW_NAMESPACE_LEFTOVERS,
+ MLX5_FLOW_NAMESPACE_ANCHOR,
MLX5_FLOW_NAMESPACE_FDB,
};
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3044cfa683f1..a3cacab22849 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -458,7 +458,8 @@ struct mlx5_ifc_ads_bits {
};
struct mlx5_ifc_flow_table_nic_cap_bits {
- u8 reserved_at_0[0x200];
+ u8 nic_rx_multi_path_tirs[0x1];
+ u8 reserved_at_1[0x1ff];
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 0ff049bd9ad4..37dd534cbeab 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -424,11 +424,11 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
/**
* ib_mad_snoop_handler - Callback handler for snooping sent MADs.
* @mad_agent: MAD agent that snooped the MAD.
- * @send_wr: Work request information on the sent MAD.
+ * @send_buf: send MAD data buffer.
* @mad_send_wc: Work completion information on the sent MAD. Valid
* only for snooping that occurs on a send completion.
*
- * Clients snooping MADs should not modify data referenced by the @send_wr
+ * Clients snooping MADs should not modify data referenced by the @send_buf
* or @mad_send_wc.
*/
typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bcd5b242e6b1..3a03c1d18afa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1493,6 +1493,11 @@ enum ib_flow_domain {
IB_FLOW_DOMAIN_NUM /* Must be last */
};
+enum ib_flow_flags {
+ IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
+ IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */
+};
+
struct ib_flow_eth_filter {
u8 dst_mac[6];
u8 src_mac[6];
@@ -1853,6 +1858,8 @@ struct ib_device {
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
+ void (*drain_rq)(struct ib_qp *qp);
+ void (*drain_sq)(struct ib_qp *qp);
struct ib_dma_mapping_ops *dma_ops;
@@ -3101,4 +3108,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
int sg_nents,
int (*set_page)(struct ib_mr *, u64));
+void ib_drain_rq(struct ib_qp *qp);
+void ib_drain_sq(struct ib_qp *qp);
+void ib_drain_qp(struct ib_qp *qp);
#endif /* IB_VERBS_H */
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 52b4a2f993f2..1852e383afd6 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -109,14 +109,13 @@ struct p9_trans_rdma {
/**
* p9_rdma_context - Keeps track of in-process WR
*
- * @wc_op: The original WR op for when the CQE completes in error.
* @busa: Bus address to unmap when the WR completes
* @req: Keeps track of requests (send)
* @rc: Keepts track of replies (receive)
*/
struct p9_rdma_req;
struct p9_rdma_context {
- enum ib_wc_opcode wc_op;
+ struct ib_cqe cqe;
dma_addr_t busa;
union {
struct p9_req_t *req;
@@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
}
static void
-handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
- struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
+recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct p9_client *client = cq->cq_context;
+ struct p9_trans_rdma *rdma = client->trans;
+ struct p9_rdma_context *c =
+ container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
struct p9_req_t *req;
int err = 0;
int16_t tag;
@@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
DMA_FROM_DEVICE);
- if (status != IB_WC_SUCCESS)
+ if (wc->status != IB_WC_SUCCESS)
goto err_out;
err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
@@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
req->rc = c->rc;
p9_client_cb(client, req, REQ_STATUS_RCVD);
+ out:
+ up(&rdma->rq_sem);
+ kfree(c);
return;
err_out:
- p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status);
+ p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
+ req, err, wc->status);
rdma->state = P9_RDMA_FLUSHING;
client->status = Disconnected;
+ goto out;
}
static void
-handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,
- struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
+send_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct p9_client *client = cq->cq_context;
+ struct p9_trans_rdma *rdma = client->trans;
+ struct p9_rdma_context *c =
+ container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
+
ib_dma_unmap_single(rdma->cm_id->device,
c->busa, c->req->tc->size,
DMA_TO_DEVICE);
+ up(&rdma->sq_sem);
+ kfree(c);
}
static void qp_event_handler(struct ib_event *event, void *context)
@@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
event->event, context);
}
-static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
-{
- struct p9_client *client = cq_context;
- struct p9_trans_rdma *rdma = client->trans;
- int ret;
- struct ib_wc wc;
-
- ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
- while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
- struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
-
- switch (c->wc_op) {
- case IB_WC_RECV:
- handle_recv(client, rdma, c, wc.status, wc.byte_len);
- up(&rdma->rq_sem);
- break;
-
- case IB_WC_SEND:
- handle_send(client, rdma, c, wc.status, wc.byte_len);
- up(&rdma->sq_sem);
- break;
-
- default:
- pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
- c->wc_op, wc.opcode, wc.status);
- break;
- }
- kfree(c);
- }
-}
-
-static void cq_event_handler(struct ib_event *e, void *v)
-{
- p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
-}
-
static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
{
if (!rdma)
@@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
ib_dealloc_pd(rdma->pd);
if (rdma->cq && !IS_ERR(rdma->cq))
- ib_destroy_cq(rdma->cq);
+ ib_free_cq(rdma->cq);
if (rdma->cm_id && !IS_ERR(rdma->cm_id))
rdma_destroy_id(rdma->cm_id);
@@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
+ c->cqe.done = recv_done;
+
sge.addr = c->busa;
sge.length = client->msize;
sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL;
- c->wc_op = IB_WC_RECV;
- wr.wr_id = (unsigned long) c;
+ wr.wr_cqe = &c->cqe;
wr.sg_list = &sge;
wr.num_sge = 1;
return ib_post_recv(rdma->qp, &wr, &bad_wr);
@@ -499,13 +477,14 @@ dont_need_post_recv:
goto send_error;
}
+ c->cqe.done = send_done;
+
sge.addr = c->busa;
sge.length = c->req->tc->size;
sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL;
- c->wc_op = IB_WC_SEND;
- wr.wr_id = (unsigned long) c;
+ wr.wr_cqe = &c->cqe;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
wr.sg_list = &sge;
@@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
struct p9_trans_rdma *rdma;
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
- struct ib_cq_init_attr cq_attr = {};
/* Parse the transport specific mount options */
err = parse_opts(args, &opts);
@@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
goto error;
/* Create the Completion Queue */
- cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1;
- rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
- cq_event_handler, client,
- &cq_attr);
+ rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
+ opts.sq_depth + opts.rq_depth + 1,
+ 0, IB_POLL_SOFTIRQ);
if (IS_ERR(rdma->cq))
goto error;
- ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
/* Create the Protection Domain */
rdma->pd = ib_alloc_pd(rdma->cm_id->device);