diff options
62 files changed, 1447 insertions, 620 deletions
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index bf508b5550c4..dc21836b5a8d 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,18 +1,3 @@ obj-$(CONFIG_INFINIBAND) += core/ -obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ -obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ -obj-$(CONFIG_INFINIBAND_QIB) += hw/qib/ -obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ -obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ -obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ -obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ -obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ -obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/ -obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ -obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ -obj-$(CONFIG_INFINIBAND_USNIC) += hw/usnic/ -obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ -obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ -obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/ -obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ -obj-$(CONFIG_INFINIBAND_ISERT) += ulp/isert/ +obj-$(CONFIG_INFINIBAND) += hw/ +obj-$(CONFIG_INFINIBAND) += ulp/ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index f820958e4047..233eaf541f55 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -618,7 +618,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { - bool preload = gfp_mask & __GFP_WAIT; + bool preload = !!(gfp_mask & __GFP_WAIT); unsigned long flags; int ret, id; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7d3292c7b4b4..cbd0383f622e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -429,15 +429,19 @@ static void ib_port_release(struct kobject *kobj) struct attribute *a; int i; - for (i = 0; (a = p->gid_group.attrs[i]); ++i) - kfree(a); + if (p->gid_group.attrs) { + for (i = 0; (a = p->gid_group.attrs[i]); ++i) + kfree(a); - kfree(p->gid_group.attrs); + kfree(p->gid_group.attrs); + } - for (i = 0; (a = p->pkey_group.attrs[i]); ++i) - kfree(a); + if (p->pkey_group.attrs) { + for (i = 0; (a = p->pkey_group.attrs[i]); ++i) + kfree(a); - kfree(p->pkey_group.attrs); + kfree(p->pkey_group.attrs); + } kfree(p); } @@ -534,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num, p->port_num = port_num; ret = kobject_init_and_add(&p->kobj, &port_type, - kobject_get(device->ports_parent), + device->ports_parent, "%d", port_num); - if (ret) - goto err_put; + if (ret) { + kfree(p); + return ret; + } ret = sysfs_create_group(&p->kobj, &pma_group); if (ret) @@ -585,6 +591,7 @@ err_free_pkey: kfree(p->pkey_group.attrs[i]); kfree(p->pkey_group.attrs); + p->pkey_group.attrs = NULL; err_remove_gid: sysfs_remove_group(&p->kobj, &p->gid_group); @@ -594,13 +601,13 @@ err_free_gid: kfree(p->gid_group.attrs[i]); kfree(p->gid_group.attrs); + p->gid_group.attrs = NULL; err_remove_pma: sysfs_remove_group(&p->kobj, &pma_group); err_put: - kobject_put(device->ports_parent); - kfree(p); + kobject_put(&p->kobj); return ret; } @@ -809,6 +816,22 @@ static struct attribute_group iw_stats_group = { .attrs = iw_proto_stats_attrs, }; +static void free_port_list_attributes(struct ib_device *device) +{ + struct kobject *p, *t; + + list_for_each_entry_safe(p, t, &device->port_list, entry) { + struct ib_port *port = container_of(p, struct ib_port, kobj); + list_del(&p->entry); + sysfs_remove_group(p, &pma_group); + sysfs_remove_group(p, &port->pkey_group); + sysfs_remove_group(p, &port->gid_group); + kobject_put(p); + } + + kobject_put(device->ports_parent); +} + int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)) @@ -835,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device, } device->ports_parent = kobject_create_and_add("ports", - kobject_get(&class_dev->kobj)); + &class_dev->kobj); if (!device->ports_parent) { ret = -ENOMEM; goto err_put; @@ -862,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device, return 0; err_put: - { - struct kobject *p, *t; - struct ib_port *port; - - list_for_each_entry_safe(p, t, &device->port_list, entry) { - list_del(&p->entry); - port = container_of(p, struct ib_port, kobj); - sysfs_remove_group(p, &pma_group); - sysfs_remove_group(p, &port->pkey_group); - sysfs_remove_group(p, &port->gid_group); - kobject_put(p); - } - } - - kobject_put(&class_dev->kobj); + free_port_list_attributes(device); err_unregister: device_unregister(class_dev); @@ -887,22 +896,18 @@ err: void ib_device_unregister_sysfs(struct ib_device *device) { - struct kobject *p, *t; - struct ib_port *port; - /* Hold kobject until ib_dealloc_device() */ - kobject_get(&device->dev.kobj); + struct kobject *kobj_dev = kobject_get(&device->dev.kobj); + int i; - list_for_each_entry_safe(p, t, &device->port_list, entry) { - list_del(&p->entry); - port = container_of(p, struct ib_port, kobj); - sysfs_remove_group(p, &pma_group); - sysfs_remove_group(p, &port->pkey_group); - sysfs_remove_group(p, &port->gid_group); - kobject_put(p); - } + if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) + sysfs_remove_group(kobj_dev, &iw_stats_group); + + free_port_list_attributes(device); + + for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) + device_remove_file(&device->dev, ib_class_attributes[i]); - kobject_put(device->ports_parent); device_unregister(&device->dev); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f0d588f8859e..1acb99100556 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -98,7 +98,7 @@ struct ib_umad_port { struct ib_umad_device { int start_port, end_port; - struct kref ref; + struct kobject kobj; struct ib_umad_port port[0]; }; @@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) +static void ib_umad_release_dev(struct kobject *kobj) { struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); + container_of(kobj, struct ib_umad_device, kobj); kfree(dev); } +static struct kobj_type ib_umad_dev_ktype = { + .release = ib_umad_release_dev, +}; + static int hdr_size(struct ib_umad_file *file) { return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : @@ -780,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; - int ret; + int ret = -ENXIO; port = container_of(inode->i_cdev, struct ib_umad_port, cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; mutex_lock(&port->file_mutex); - if (!port->ib_dev) { - ret = -ENXIO; + if (!port->ib_dev) goto out; - } + ret = -ENOMEM; file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) { - kref_put(&port->umad_dev->ref, ib_umad_release_dev); - ret = -ENOMEM; + if (!file) goto out; - } mutex_init(&file->mutex); spin_lock_init(&file->send_lock); @@ -814,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); ret = nonseekable_open(inode, filp); + if (ret) { + list_del(&file->port_list); + kfree(file); + goto out; + } + + kobject_get(&port->umad_dev->kobj); out: mutex_unlock(&port->file_mutex); @@ -852,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp) mutex_unlock(&file->port->file_mutex); kfree(file); - kref_put(&dev->ref, ib_umad_release_dev); + kobject_put(&dev->kobj); return 0; } @@ -880,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) int ret; port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { @@ -898,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - if (ret) { - up(&port->sm_sem); - goto fail; - } + if (ret) + goto err_up_sem; filp->private_data = port; - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret) + goto err_clr_sm_cap; + + kobject_get(&port->umad_dev->kobj); + + return 0; + +err_clr_sm_cap: + swap(props.set_port_cap_mask, props.clr_port_cap_mask); + ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: + up(&port->sm_sem); fail: - kref_put(&port->umad_dev->ref, ib_umad_release_dev); return ret; } @@ -927,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) up(&port->sm_sem); - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + kobject_put(&port->umad_dev->kobj); return ret; } @@ -995,6 +1004,7 @@ static int find_overflow_devnum(void) } static int ib_umad_init_port(struct ib_device *device, int port_num, + struct ib_umad_device *umad_dev, struct ib_umad_port *port) { int devnum; @@ -1027,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; + port->cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); if (cdev_add(&port->cdev, base, 1)) goto err_cdev; @@ -1045,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, base += IB_UMAD_MAX_PORTS; cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; + port->sm_cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; @@ -1138,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device) if (!umad_dev) return; - kref_init(&umad_dev->ref); + kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); umad_dev->start_port = s; umad_dev->end_port = e; @@ -1146,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device) for (i = s; i <= e; ++i) { umad_dev->port[i - s].umad_dev = umad_dev; - if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) + if (ib_umad_init_port(device, i, umad_dev, + &umad_dev->port[i - s])) goto err; } @@ -1158,7 +1171,7 @@ err: while (--i >= s) ib_umad_kill_port(&umad_dev->port[i - s]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); } static void ib_umad_remove_one(struct ib_device *device) @@ -1172,7 +1185,7 @@ static void ib_umad_remove_one(struct ib_device *device) for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) ib_umad_kill_port(&umad_dev->port[i]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); } static char *umad_devnode(struct device *dev, umode_t *mode) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 92525f855d82..c2b89cc5dbca 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -48,7 +48,7 @@ #include "core_priv.h" -int ib_rate_to_mult(enum ib_rate rate) +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 1; @@ -65,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate) } EXPORT_SYMBOL(ib_rate_to_mult); -enum ib_rate mult_to_ib_rate(int mult) +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult) { switch (mult) { case 1: return IB_RATE_2_5_GBPS; @@ -82,7 +82,7 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); -int ib_rate_to_mbps(enum ib_rate rate) +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 2500; @@ -107,7 +107,7 @@ int ib_rate_to_mbps(enum ib_rate rate) } EXPORT_SYMBOL(ib_rate_to_mbps); -enum rdma_transport_type +__attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { switch (node_type) { diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile new file mode 100644 index 000000000000..e900b03531a9 --- /dev/null +++ b/drivers/infiniband/hw/Makefile @@ -0,0 +1,12 @@ +obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ +obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ +obj-$(CONFIG_INFINIBAND_QIB) += qib/ +obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ +obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ +obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ +obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ +obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ +obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ +obj-$(CONFIG_INFINIBAND_NES) += nes/ +obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ +obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index c3f5aca4ef00..de1c61b417d6 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -735,14 +735,12 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); + tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = cpu_to_be32(len); tpt.va_hi = cpu_to_be32((u32) (to >> 32)); tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); + tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); } err = cxio_hal_ctrl_qp_write_mem(rdev_p, stag_idx + diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 095bb046e2c8..cb78b1e9bcd9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -418,6 +418,7 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) skb->priority = CPL_PRIORITY_DATA; set_arp_failure_handler(skb, abort_arp_failure); req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 7de6c9fcf883..96d7131ab974 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -47,6 +47,8 @@ #include <net/ip6_route.h> #include <net/addrconf.h> +#include <rdma/ib_addr.h> + #include "iw_cxgb4.h" static char *states[] = { @@ -347,10 +349,7 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) static struct net_device *get_real_dev(struct net_device *egress_dev) { - struct net_device *phys_dev = egress_dev; - if (egress_dev->priv_flags & IFF_802_1Q_VLAN) - phys_dev = vlan_dev_real_dev(egress_dev); - return phys_dev; + return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) @@ -1788,16 +1787,16 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, if (!ep->l2t) goto out; ep->mtu = dst_mtu(dst); - ep->tx_chan = cxgb4_port_chan(n->dev); - ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(n->dev) * step; - ep->ctrlq_idx = cxgb4_port_idx(n->dev); + ep->txq_idx = cxgb4_port_idx(pdev) * step; + ep->ctrlq_idx = cxgb4_port_idx(pdev); step = cdev->rdev.lldi.nrxq / cdev->rdev.lldi.nchan; ep->rss_qid = cdev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(n->dev) * step]; + cxgb4_port_idx(pdev) * step]; if (clear_mpa_v1) { ep->retry_with_mpa_v1 = 0; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index cfaa56ada189..7151a02b4ebb 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -940,7 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, if (!mm2) goto err4; - memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -951,7 +950,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); if (ret) goto err5; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index bd749e75cec4..dd93aadc996e 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -779,6 +779,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pci_resource_len(devp->rdev.lldi.pdev, 2)); if (!devp->rdev.bar2_kva) { pr_err(MOD "Unable to ioremap BAR2\n"); + ib_dealloc_device(&devp->ibdev); return ERR_PTR(-EINVAL); } } else if (ocqp_supported(infop)) { @@ -790,6 +791,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi.vr->ocq.size); if (!devp->rdev.oc_mw_kva) { pr_err(MOD "Unable to ioremap onchip mem\n"); + ib_dealloc_device(&devp->ibdev); return ERR_PTR(-EINVAL); } } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index a94a3e12c349..c777e22bd8d5 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -122,7 +122,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); - if (udata->outlen < sizeof(uresp)) { + if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { if (!warned++) pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled."); rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; @@ -140,7 +140,8 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, context->key += PAGE_SIZE; spin_unlock(&context->mmap_lock); - ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); if (ret) goto err_mm; diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 11ccd276e5d9..cbd0ce170728 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -48,6 +48,7 @@ struct c4iw_create_cq_resp { __u32 cqid; __u32 size; __u32 qid_mask; + __u32 reserved; /* explicit padding (optional for i386) */ }; @@ -74,5 +75,6 @@ struct c4iw_create_qp_resp { struct c4iw_alloc_ucontext_resp { __u64 status_page_key; __u32 status_page_size; + __u32 reserved; /* explicit padding (optional for i386) */ }; #endif diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index e2f9a51f4a38..45802e97332e 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -346,6 +346,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp, ret = -EFAULT; goto bail; } + dp.len = odp.len; + dp.unit = odp.unit; + dp.data = odp.data; + dp.pbc_wd = 0; } else { ret = -EINVAL; goto bail; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 26dfbc8ee0f1..01ba792791a0 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -70,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd) if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { int i; if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && - dd->ipath_lastcancel > jiffies) { + time_after(dd->ipath_lastcancel, jiffies)) { __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, "SendbufErrs %lx %lx", sbuf[0], sbuf[1]); @@ -755,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) /* likely due to cancel; so suppress message unless verbose */ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && - dd->ipath_lastcancel > jiffies) { + time_after(dd->ipath_lastcancel, jiffies)) { /* armlaunch takes precedence; it often causes both. */ ipath_cdbg(VERBOSE, "Suppressed %s error (%llx) after sendbuf cancel\n", diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 98ac18ec977e..17a517766ad2 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -247,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque) /* ipath_sdma_abort() is done, waiting for interrupt */ if (status == IPATH_SDMA_ABORT_DISARMED) { - if (jiffies < dd->ipath_sdma_abort_intr_timeout) + if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout)) goto resched_noprint; /* give up, intr got lost somewhere */ ipath_dbg("give up waiting for SDMADISABLED intr\n"); @@ -341,7 +341,7 @@ resched: * JAG - this is bad to just have default be a loop without * state change */ - if (jiffies > dd->ipath_sdma_abort_jiffies) { + if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) { ipath_dbg("looping with status 0x%08lx\n", dd->ipath_sdma_status); dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 170dca608042..2d8c3397774f 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -73,7 +73,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; - int is_mcast; + int is_mcast = 0; struct in6_addr in6; u16 vlan_tag; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5f640814cc81..1066eec854a9 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * int err; err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, - PAGE_SIZE * 2, &buf->buf); + PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); if (err) goto out; @@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); if (err) goto err_mtt; @@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1); + err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); if (err) goto err_cq; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index fd36ec672632..287ad0564acd 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -478,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (!dest_qpt) tun_qp = &tun_ctx->qp[0]; else @@ -667,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } /* Class-specific handling */ switch (mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + /* 255 indicates the dom0 */ + if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) { + if (!mlx4_vf_smi_enabled(dev->dev, slave, port)) + return -EPERM; + /* for a VF. drop unsolicited MADs */ + if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) { + mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n", + slave, mad->mad_hdr.mgmt_class, + mad->mad_hdr.method); + return -EINVAL; + } + } + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_demux_sa_handler(ibdev, port, slave, (struct ib_sa_mad *) mad)) @@ -1165,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (dest_qpt == IB_QPT_SMI) { src_qpnum = 0; sqp = &sqp_ctx->qp[0]; @@ -1285,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc "belongs to another slave\n", wc->src_qp); return; } - if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " - "non-master trying to send QP0 packets\n", wc->src_qp); - return; - } /* Map transaction ID */ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, @@ -1317,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc /* Class-specific handling */ switch (tunnel->mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + if (slave != mlx4_master_func_num(dev->dev) && + !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port)) + return; + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, (struct ib_sa_mad *) &tunnel->mad)) @@ -1749,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, return -EEXIST; ctx->state = DEMUX_PV_STATE_STARTING; - /* have QP0 only on port owner, and only if link layer is IB */ - if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && - rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) + /* have QP0 only if link layer is IB */ + if (rdma_port_get_link_layer(ibdev, ctx->port) == + IB_LINK_LAYER_INFINIBAND) ctx->has_smi = 1; if (ctx->has_smi) { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1b6dbe156a37..3c3806aff712 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -544,12 +544,11 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, return 0; } -static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, - u32 cap_mask) +static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, + u32 cap_mask) { struct mlx4_cmd_mailbox *mailbox; int err; - u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) @@ -563,8 +562,8 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } - err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; @@ -573,11 +572,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; struct ib_port_attr attr; u32 cap_mask; int err; - mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); + /* return OK if this is RoCE. CM calls ib_modify_port() regardless + * of whether port link layer is ETH or IB. For ETH ports, qkey + * violations and port capabilities are not meaningful. + */ + if (is_eth) + return 0; + + mutex_lock(&mdev->cap_mask_mutex); err = mlx4_ib_query_port(ibdev, port, &attr); if (err) @@ -586,9 +594,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; - err = mlx4_SET_PORT(to_mdev(ibdev), port, - !!(mask & IB_PORT_RESET_QKEY_CNTR), - cap_mask); + err = mlx4_ib_SET_PORT(mdev, port, + !!(mask & IB_PORT_RESET_QKEY_CNTR), + cap_mask); out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index f589522fddfd..bb8c9dd442ae 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, + MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 41308af4163c..5b0cb8e2d807 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -608,9 +608,20 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return !attr->srq; } +static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i]) + return !!dev->caps.qp0_qkey[i]; + } + return 0; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, + gfp_t gfp) { int qpn; int err; @@ -625,10 +636,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { if (init_attr->qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_PROXY_GSI; - else if (mlx4_is_master(dev->dev)) - qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; - else - qp_type = MLX4_IB_QPT_PROXY_SMI; + else { + if (mlx4_is_master(dev->dev) || + qp0_enabled_vf(dev->dev, sqpn)) + qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_PROXY_SMI; + } } qpn = sqpn; /* add extra sg entry for tunneling */ @@ -643,7 +657,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return -EINVAL; if (tnl_init->proxy_qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_TUN_GSI; - else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) + else if (tnl_init->slave == mlx4_master_func_num(dev->dev) || + mlx4_vf_smi_enabled(dev->dev, tnl_init->slave, + tnl_init->port)) qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; @@ -658,14 +674,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL); + sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); if (!sqp) return -ENOMEM; qp = &sqp->qp; qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; } else { - qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL); + qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); if (!qp) return -ENOMEM; qp->pri.vid = 0xFFFF; @@ -748,14 +764,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (qp_has_rq(init_attr)) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0); + err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); if (err) goto err; *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) { err = -ENOMEM; goto err_db; } @@ -765,13 +781,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); - + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -801,7 +816,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_proxy; } - err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; @@ -1040,7 +1055,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp = NULL; int err; u16 xrcdn = 0; + gfp_t gfp; + gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? + GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1049,7 +1067,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | - MLX4_IB_QP_NETIF)) + MLX4_IB_QP_NETIF | + MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { @@ -1059,7 +1078,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (init_attr->create_flags && (udata || - ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) && + ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && init_attr->qp_type != IB_QPT_UD) || ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) @@ -1079,7 +1098,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, gfp); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = 0xFFFF; @@ -1088,7 +1107,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); + udata, 0, &qp, gfp); if (err) return ERR_PTR(err); @@ -1106,7 +1125,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, get_sqp_num(to_mdev(pd->device), init_attr), - &qp); + &qp, gfp); if (err) return ERR_PTR(err); @@ -1930,6 +1949,19 @@ out: return err; } +static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i] || + qpn == dev->caps.qp0_tunnel[i]) { + *qkey = dev->caps.qp0_qkey[i]; + return 0; + } + } + return -EINVAL; +} + static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) @@ -1987,8 +2019,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) - return -EINVAL; + if (mlx4_is_master(mdev->dev)) { + if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } else { + if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); @@ -2370,7 +2407,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, enum ib_qp_type qpt) + struct ib_send_wr *wr, + enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; struct mlx4_av sqp_av = {0}; @@ -2383,8 +2421,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); - /* This function used only for sending on QP1 proxies */ - dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + if (qpt == MLX4_IB_QPT_PROXY_GSI) + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + else + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]); /* Use QKEY from the QP context, which is set by master */ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } @@ -2679,11 +2719,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { - err = -ENOSYS; - *bad_wr = wr; - goto out; - } err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; @@ -2700,16 +2735,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += seglen / 16; break; case MLX4_IB_QPT_PROXY_SMI: - /* don't allow QP0 sends on guests */ - err = -ENOSYS; - *bad_wr = wr; - goto out; case MLX4_IB_QPT_PROXY_GSI: /* If we are tunneling special qps, this is a UD qp. * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type); + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; build_tunnel_header(wr, wqe, &seglen); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 60c5fb025fc7..62d9285300af 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0); + err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, + GFP_KERNEL)) { err = -ENOMEM; goto err_db; } @@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); if (err) goto err_mtt; diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 5a38e43eca65..cb4c66e723b5 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -389,8 +389,10 @@ struct mlx4_port { struct mlx4_ib_dev *dev; struct attribute_group pkey_group; struct attribute_group gid_group; - u8 port_num; + struct device_attribute enable_smi_admin; + struct device_attribute smi_enabled; int slave; + u8 port_num; }; @@ -558,6 +560,101 @@ err: return NULL; } +static ssize_t sysfs_show_smi_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, smi_enabled); + ssize_t len = 0; + + if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_show_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + ssize_t len = 0; + + if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_store_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + int enable; + + if (sscanf(buf, "%i", &enable) != 1 || + enable < 0 || enable > 1) + return -EINVAL; + + if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable)) + return -EINVAL; + return count; +} + +static int add_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + int ret; + + /* do not display entries if eth transport, or if master */ + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return 0; + + sysfs_attr_init(&p->smi_enabled.attr); + p->smi_enabled.show = sysfs_show_smi_enabled; + p->smi_enabled.store = NULL; + p->smi_enabled.attr.name = "smi_enabled"; + p->smi_enabled.attr.mode = 0444; + ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr); + if (ret) { + pr_err("failed to create smi_enabled\n"); + return ret; + } + + sysfs_attr_init(&p->enable_smi_admin.attr); + p->enable_smi_admin.show = sysfs_show_enable_smi_admin; + p->enable_smi_admin.store = sysfs_store_enable_smi_admin; + p->enable_smi_admin.attr.name = "enable_smi_admin"; + p->enable_smi_admin.attr.mode = 0644; + ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr); + if (ret) { + pr_err("failed to create enable_smi_admin\n"); + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + return ret; + } + return 0; +} + +static void remove_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return; + + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr); +} + static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) { struct mlx4_port *p; @@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) if (ret) goto err_free_gid; + ret = add_vf_smi_entries(p); + if (ret) + goto err_free_gid; + list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]); return 0; @@ -669,6 +770,7 @@ err_add: mport = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &mport->pkey_group); sysfs_remove_group(p, &mport->gid_group); + remove_vf_smi_entries(mport); kobject_put(p); } kobject_put(dev->dev_ports_parent[slave]); @@ -713,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) port = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); + remove_vf_smi_entries(port); kobject_put(p); kobject_put(device->dev_ports_parent[slave]); } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 62bb6b49dc1d..8ae4f896cb41 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -32,6 +32,7 @@ #include <linux/kref.h> #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" #include "user.h" @@ -602,14 +603,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd; + size_t ucmdlen; int page_shift; int npages; int ncont; int err; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 50541586e0a6..f2ccf1a5a291 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -264,8 +264,6 @@ struct mlx5_ib_mr { __be64 *pas; dma_addr_t dma; int npages; - struct completion done; - enum ib_wc_status status; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; struct mlx5_core_sig_ctx *sig; @@ -277,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list { dma_addr_t map; }; +struct mlx5_ib_umr_context { + enum ib_wc_status status; + struct completion done; +}; + +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ + context->status = -1; + init_completion(&context->done); +} + struct umr_common { struct ib_pd *pd; struct ib_cq *cq; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 81392b26d078..afa873bd028e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -73,6 +73,8 @@ static void reg_mr_callback(int status, void *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; + struct mlx5_mr_table *table = &dev->mdev.priv.mr_table; + int err; spin_lock_irqsave(&ent->lock, flags); ent->pending--; @@ -107,6 +109,13 @@ static void reg_mr_callback(int status, void *context) ent->cur++; ent->size++; spin_unlock_irqrestore(&ent->lock, flags); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), + &mr->mmr); + if (err) + pr_err("Error inserting to mr tree. 0x%x\n", -err); + write_unlock_irqrestore(&table->lock, flags); } static int add_keys(struct mlx5_ib_dev *dev, int c, int num) @@ -699,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) { - struct mlx5_ib_mr *mr; + struct mlx5_ib_umr_context *context; struct ib_wc wc; int err; @@ -712,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) if (err == 0) break; - mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; - mr->status = wc.status; - complete(&mr->done); + context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; + context->status = wc.status; + complete(&context->done); } ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); } @@ -726,11 +735,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; struct ib_send_wr wr, *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; int size = sizeof(u64) * npages; - int err; + int err = 0; int i; for (i = 0; i < 1; i++) { @@ -751,7 +761,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); if (!mr->pas) { err = -ENOMEM; - goto error; + goto free_mr; } mlx5_ib_populate_pas(dev, umem, page_shift, @@ -760,44 +770,46 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, DMA_TO_DEVICE); if (dma_mapping_error(ddev, mr->dma)) { - kfree(mr->pas); err = -ENOMEM; - goto error; + goto free_pas; } memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)mr; + wr.wr_id = (u64)(unsigned long)&umr_context; prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); - /* We serialize polls so one process does not kidnap another's - * completion. This is not a problem since wr is completed in - * around 1 usec - */ + mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - init_completion(&mr->done); err = ib_post_send(umrc->qp, &wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); - up(&umrc->sem); - goto error; + goto unmap_dma; + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed\n"); + err = -EFAULT; + } } - wait_for_completion(&mr->done); - up(&umrc->sem); + mr->mmr.iova = virt_addr; + mr->mmr.size = len; + mr->mmr.pd = to_mpd(pd)->pdn; + +unmap_dma: + up(&umrc->sem); dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + +free_pas: kfree(mr->pas); - if (mr->status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed\n"); - err = -EFAULT; - goto error; +free_mr: + if (err) { + free_cached_mr(dev, mr); + return ERR_PTR(err); } return mr; - -error: - free_cached_mr(dev, mr); - return ERR_PTR(err); } static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, @@ -926,24 +938,26 @@ error: static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; struct ib_send_wr wr, *bad; int err; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)mr; + wr.wr_id = (u64)(unsigned long)&umr_context; prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - init_completion(&mr->done); err = ib_post_send(umrc->qp, &wr, &bad); if (err) { up(&umrc->sem); mlx5_ib_dbg(dev, "err %d\n", err); goto error; + } else { + wait_for_completion(&umr_context.done); + up(&umrc->sem); } - wait_for_completion(&mr->done); - up(&umrc->sem); - if (mr->status != IB_WC_SUCCESS) { + if (umr_context.status != IB_WC_SUCCESS) { mlx5_ib_warn(dev, "unreg umr failed\n"); err = -EFAULT; goto error; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index dc930ed21eca..d13ddf1c0033 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -574,6 +574,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uar_index = uuarn_to_uar_index(&context->uuari, uuarn); mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + qp->rq.offset = 0; + qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + err = set_user_buf_size(dev, qp, &ucmd); if (err) goto err_uuar; @@ -2078,6 +2082,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, struct ib_sig_domain *wire = &sig_attrs->wire; int ret, selector; + memset(bsf, 0, sizeof(*bsf)); switch (sig_attrs->mem.sig_type) { case IB_SIG_TYPE_T10_DIF: if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) @@ -2090,9 +2095,11 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, /* Same block structure */ basic->bsf_size_sbs = 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) - basic->wire.copy_byte_mask = 0xff; - else - basic->wire.copy_byte_mask = 0x3f; + basic->wire.copy_byte_mask |= 0xc0; + if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) + basic->wire.copy_byte_mask |= 0x30; + if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) + basic->wire.copy_byte_mask |= 0x0f; } else basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); @@ -2131,9 +2138,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, int ret; int wqe_size; - if (!wr->wr.sig_handover.prot) { + if (!wr->wr.sig_handover.prot || + (data_key == wr->wr.sig_handover.prot->lkey && + data_va == wr->wr.sig_handover.prot->addr && + data_len == wr->wr.sig_handover.prot->length)) { /** * Source domain doesn't contain signature information + * or data and protection are interleaved in memory. * So need construct: * ------------------ * | data_klm | @@ -2187,23 +2198,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, data_sentry->bcount = cpu_to_be16(block_size); data_sentry->key = cpu_to_be32(data_key); data_sentry->va = cpu_to_be64(data_va); + data_sentry->stride = cpu_to_be16(block_size); + prot_sentry->bcount = cpu_to_be16(prot_size); prot_sentry->key = cpu_to_be32(prot_key); + prot_sentry->va = cpu_to_be64(prot_va); + prot_sentry->stride = cpu_to_be16(prot_size); - if (prot_key == data_key && prot_va == data_va) { - /** - * The data and protection are interleaved - * in a single memory region - **/ - prot_sentry->va = cpu_to_be64(data_va + block_size); - prot_sentry->stride = cpu_to_be16(block_size + prot_size); - data_sentry->stride = prot_sentry->stride; - } else { - /* The data and protection are two different buffers */ - prot_sentry->va = cpu_to_be64(prot_va); - data_sentry->stride = cpu_to_be16(block_size); - prot_sentry->stride = cpu_to_be16(prot_size); - } wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + sizeof(*prot_sentry), 64); } @@ -2275,7 +2276,10 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, /* length of the protected region, data + protection */ region_len = wr->sg_list->length; - if (wr->wr.sig_handover.prot) + if (wr->wr.sig_handover.prot && + (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || + wr->wr.sig_handover.prot->addr != wr->sg_list->addr || + wr->wr.sig_handover.prot->length != wr->sg_list->length)) region_len += wr->wr.sig_handover.prot->length; /** diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 210b3eaf188a..384af6dec5eb 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -35,6 +35,7 @@ #include <linux/mlx5/srq.h> #include <linux/slab.h> #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" #include "user.h" @@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd; + size_t ucmdlen; int err; int npages; int page_shift; int ncont; u32 offset; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { mlx5_ib_dbg(dev, "failed copy udata\n"); return -EFAULT; } + + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 0f4f8e42a17f..d0ba264ac1ed 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -91,6 +91,7 @@ struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_cq_resp { @@ -109,6 +110,7 @@ struct mlx5_ib_create_srq { __u64 buf_addr; __u64 db_addr; __u32 flags; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_srq_resp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 6c54106f5e64..41a9aec9998d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -510,16 +510,9 @@ exit: return status; } -static int ocrdma_debugfs_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - return 0; -} - static const struct file_operations ocrdma_dbg_ops = { .owner = THIS_MODULE, - .open = ocrdma_debugfs_open, + .open = simple_open, .read = ocrdma_dbgfs_ops_read, }; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 5b7aeb224a30..8d3c78ddc906 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1272,7 +1272,7 @@ static int qib_notify_dca(struct notifier_block *nb, unsigned long event, * Do all the generic driver unit- and chip-independent memory * allocation and initialization. */ -static int __init qlogic_ib_init(void) +static int __init qib_ib_init(void) { int ret; @@ -1316,12 +1316,12 @@ bail: return ret; } -module_init(qlogic_ib_init); +module_init(qib_ib_init); /* * Do the non-unit driver cleanup, memory free, etc. at unload. */ -static void __exit qlogic_ib_cleanup(void) +static void __exit qib_ib_cleanup(void) { int ret; @@ -1346,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void) qib_dev_cleanup(); } -module_exit(qlogic_ib_cleanup); +module_exit(qib_ib_cleanup); /* this can only be called after a successful initialization */ static void cleanup_device_data(struct qib_devdata *dd) diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index edad991d60ed..22c720e5740d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) event.event = IB_EVENT_PKEY_CHANGE; event.device = &dd->verbs_dev.ibdev; - event.element.port_num = 1; + event.element.port_num = port; ib_dispatch_event(&event); } return 0; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 0cad0c40d742..7fcc150d603c 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -985,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, struct ib_qp *ret; if (init_attr->cap.max_send_sge > ib_qib_max_sges || - init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || + init_attr->create_flags) { ret = ERR_PTR(-EINVAL); goto bail; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index d48d2c0a2e3c..53bd6a2d9cdb 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -466,6 +466,9 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, ucontext = to_uucontext(pd->uobject->context); us_ibdev = to_usdev(pd->device); + if (init_attr->create_flags) + return ERR_PTR(-EINVAL); + err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); if (err) { usnic_err("%s: cannot copy udata for create_qp\n", diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c index d135ad90d914..3a4288e0fbac 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile new file mode 100644 index 000000000000..f3c7dcf03098 --- /dev/null +++ b/drivers/infiniband/ulp/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_INFINIBAND_IPOIB) += ipoib/ +obj-$(CONFIG_INFINIBAND_SRP) += srp/ +obj-$(CONFIG_INFINIBAND_SRPT) += srpt/ +obj-$(CONFIG_INFINIBAND_ISER) += iser/ +obj-$(CONFIG_INFINIBAND_ISERT) += isert/ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 1377f85911c2..933efcea0d03 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1030,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .cap.max_send_sge = 1, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, - .qp_context = tx + .qp_context = tx, + .create_flags = IB_QP_CREATE_USE_GFP_NOIO }; - return ib_create_qp(priv->pd, &attr); + struct ib_qp *tx_qp; + + tx_qp = ib_create_qp(priv->pd, &attr); + if (PTR_ERR(tx_qp) == -EINVAL) { + ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", + priv->ca->name); + attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; + tx_qp = ib_create_qp(priv->pd, &attr); + } + return tx_qp; } static int ipoib_cm_send_req(struct net_device *dev, @@ -1104,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; - p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring); + p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, + GFP_NOIO, PAGE_KERNEL); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); if (IS_ERR(p->qp)) { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 25f195ef44b0..eb7973957a6e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -99,6 +99,7 @@ MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); module_param_named(pi_guard, iser_pi_guard, int, 0644); MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); +static struct workqueue_struct *release_wq; struct iser_global ig; void @@ -337,24 +338,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) return cls_conn; } -static void -iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) -{ - struct iscsi_conn *conn = cls_conn->dd_data; - struct iser_conn *ib_conn = conn->dd_data; - - iscsi_conn_teardown(cls_conn); - /* - * Userspace will normally call the stop callback and - * already have freed the ib_conn, but if it goofed up then - * we free it here. - */ - if (ib_conn) { - ib_conn->iscsi_conn = NULL; - iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ - } -} - static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, @@ -392,29 +375,39 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, conn->dd_data = ib_conn; ib_conn->iscsi_conn = conn; - iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */ return 0; } +static int +iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) +{ + struct iscsi_conn *iscsi_conn; + struct iser_conn *ib_conn; + + iscsi_conn = cls_conn->dd_data; + ib_conn = iscsi_conn->dd_data; + reinit_completion(&ib_conn->stop_completion); + + return iscsi_conn_start(cls_conn); +} + static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { struct iscsi_conn *conn = cls_conn->dd_data; struct iser_conn *ib_conn = conn->dd_data; + iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn); + iscsi_conn_stop(cls_conn, flag); + /* * Userspace may have goofed up and not bound the connection or * might have only partially setup the connection. */ if (ib_conn) { - iscsi_conn_stop(cls_conn, flag); - /* - * There is no unbind event so the stop callback - * must release the ref from the bind. - */ - iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ + conn->dd_data = NULL; + complete(&ib_conn->stop_completion); } - conn->dd_data = NULL; } static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) @@ -515,28 +508,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, case ISCSI_PARAM_HDRDGST_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("DataDigest wasn't negotiated to None"); + iser_err("DataDigest wasn't negotiated to None\n"); return -EPROTO; } break; case ISCSI_PARAM_DATADGST_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("DataDigest wasn't negotiated to None"); + iser_err("DataDigest wasn't negotiated to None\n"); return -EPROTO; } break; case ISCSI_PARAM_IFMARKER_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("IFMarker wasn't negotiated to No"); + iser_err("IFMarker wasn't negotiated to No\n"); return -EPROTO; } break; case ISCSI_PARAM_OFMARKER_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("OFMarker wasn't negotiated to No"); + iser_err("OFMarker wasn't negotiated to No\n"); return -EPROTO; } break; @@ -652,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) struct iser_conn *ib_conn; ib_conn = ep->dd_data; - if (ib_conn->iscsi_conn) - /* - * Must suspend xmit path if the ep is bound to the - * iscsi_conn, so we know we are not accessing the ib_conn - * when we free it. - * - * This may not be bound if the ep poll failed. - */ - iscsi_suspend_tx(ib_conn->iscsi_conn); - - - iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state); + iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state); iser_conn_terminate(ib_conn); + + /* + * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop + * call and ISER_CONN_DOWN state before freeing the iser resources. + * otherwise we are safe to free resources immediately. + */ + if (ib_conn->iscsi_conn) { + INIT_WORK(&ib_conn->release_work, iser_release_work); + queue_work(release_wq, &ib_conn->release_work); + } else { + iser_conn_release(ib_conn); + } } static umode_t iser_attr_is_visible(int param_type, int param) @@ -748,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = { /* connection management */ .create_conn = iscsi_iser_conn_create, .bind_conn = iscsi_iser_conn_bind, - .destroy_conn = iscsi_iser_conn_destroy, + .destroy_conn = iscsi_conn_teardown, .attr_is_visible = iser_attr_is_visible, .set_param = iscsi_iser_set_param, .get_conn_param = iscsi_conn_get_param, .get_ep_param = iscsi_iser_get_ep_param, .get_session_param = iscsi_session_get_param, - .start_conn = iscsi_conn_start, + .start_conn = iscsi_iser_conn_start, .stop_conn = iscsi_iser_conn_stop, /* iscsi host params */ .get_host_param = iscsi_host_get_param, @@ -801,6 +795,12 @@ static int __init iser_init(void) mutex_init(&ig.connlist_mutex); INIT_LIST_HEAD(&ig.connlist); + release_wq = alloc_workqueue("release workqueue", 0, 0); + if (!release_wq) { + iser_err("failed to allocate release workqueue\n"); + return -ENOMEM; + } + iscsi_iser_scsi_transport = iscsi_register_transport( &iscsi_iser_transport); if (!iscsi_iser_scsi_transport) { @@ -819,7 +819,24 @@ register_transport_failure: static void __exit iser_exit(void) { + struct iser_conn *ib_conn, *n; + int connlist_empty; + iser_dbg("Removing iSER datamover...\n"); + destroy_workqueue(release_wq); + + mutex_lock(&ig.connlist_mutex); + connlist_empty = list_empty(&ig.connlist); + mutex_unlock(&ig.connlist_mutex); + + if (!connlist_empty) { + iser_err("Error cleanup stage completed but we still have iser " + "connections, destroying them anyway.\n"); + list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) { + iser_conn_release(ib_conn); + } + } + iscsi_unregister_transport(&iscsi_iser_transport); kmem_cache_destroy(ig.desc_cache); } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 324129f80d40..97cd385bf7f7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -69,7 +69,7 @@ #define DRV_NAME "iser" #define PFX DRV_NAME ": " -#define DRV_VER "1.3" +#define DRV_VER "1.4" #define iser_dbg(fmt, arg...) \ do { \ @@ -333,6 +333,8 @@ struct iser_conn { int post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ char name[ISER_OBJECT_NAME_SIZE]; + struct work_struct release_work; + struct completion stop_completion; struct list_head conn_list; /* entry in ig conn list */ char *login_buf; @@ -417,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn, void iser_conn_init(struct iser_conn *ib_conn); -void iser_conn_get(struct iser_conn *ib_conn); - -int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed); +void iser_conn_release(struct iser_conn *ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); +void iser_release_work(struct work_struct *work); + void iser_rcv_completion(struct iser_rx_desc *desc, unsigned long dto_xfer_len, struct iser_conn *ib_conn); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 32849f2becde..ea01075f9f9b 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -581,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, return ret; } +void iser_release_work(struct work_struct *work) +{ + struct iser_conn *ib_conn; + + ib_conn = container_of(work, struct iser_conn, release_work); + + /* wait for .conn_stop callback */ + wait_for_completion(&ib_conn->stop_completion); + + /* wait for the qp`s post send and post receive buffers to empty */ + wait_event_interruptible(ib_conn->wait, + ib_conn->state == ISER_CONN_DOWN); + + iser_conn_release(ib_conn); +} + /** * Frees all conn objects and deallocs conn descriptor */ -static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) +void iser_conn_release(struct iser_conn *ib_conn) { struct iser_device *device = ib_conn->device; - BUG_ON(ib_conn->state != ISER_CONN_DOWN); + BUG_ON(ib_conn->state == ISER_CONN_UP); mutex_lock(&ig.connlist_mutex); list_del(&ib_conn->conn_list); @@ -600,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) if (device != NULL) iser_device_try_release(device); /* if cma handler context, the caller actually destroy the id */ - if (ib_conn->cma_id != NULL && can_destroy_id) { + if (ib_conn->cma_id != NULL) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } iscsi_destroy_endpoint(ib_conn->ep); } -void iser_conn_get(struct iser_conn *ib_conn) -{ - atomic_inc(&ib_conn->refcount); -} - -int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) -{ - if (atomic_dec_and_test(&ib_conn->refcount)) { - iser_conn_release(ib_conn, can_destroy_id); - return 1; - } - return 0; -} - /** * triggers start of the disconnect procedures and wait for them to be done */ @@ -638,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn) if (err) iser_err("Failed to disconnect, conn: 0x%p err %d\n", ib_conn,err); - - wait_event_interruptible(ib_conn->wait, - ib_conn->state == ISER_CONN_DOWN); - - iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ } -static int iser_connect_error(struct rdma_cm_id *cma_id) +static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; + ib_conn = (struct iser_conn *)cma_id->context; ib_conn->state = ISER_CONN_DOWN; wake_up_interruptible(&ib_conn->wait); - return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ } -static int iser_addr_handler(struct rdma_cm_id *cma_id) +static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; struct iser_conn *ib_conn; @@ -664,7 +661,8 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id) device = iser_device_find_by_ib_device(cma_id); if (!device) { iser_err("device lookup/creation failed\n"); - return iser_connect_error(cma_id); + iser_connect_error(cma_id); + return; } ib_conn = (struct iser_conn *)cma_id->context; @@ -686,13 +684,12 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id) ret = rdma_resolve_route(cma_id, 1000); if (ret) { iser_err("resolve route failed: %d\n", ret); - return iser_connect_error(cma_id); + iser_connect_error(cma_id); + return; } - - return 0; } -static int iser_route_handler(struct rdma_cm_id *cma_id) +static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; @@ -720,9 +717,9 @@ static int iser_route_handler(struct rdma_cm_id *cma_id) goto failure; } - return 0; + return; failure: - return iser_connect_error(cma_id); + iser_connect_error(cma_id); } static void iser_connected_handler(struct rdma_cm_id *cma_id) @@ -735,14 +732,13 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id) iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); ib_conn = (struct iser_conn *)cma_id->context; - ib_conn->state = ISER_CONN_UP; - wake_up_interruptible(&ib_conn->wait); + if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) + wake_up_interruptible(&ib_conn->wait); } -static int iser_disconnected_handler(struct rdma_cm_id *cma_id) +static void iser_disconnected_handler(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; - int ret; ib_conn = (struct iser_conn *)cma_id->context; @@ -762,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id) ib_conn->state = ISER_CONN_DOWN; wake_up_interruptible(&ib_conn->wait); } - - ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ - return ret; } static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - int ret = 0; - iser_info("event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: - ret = iser_addr_handler(cma_id); + iser_addr_handler(cma_id); break; case RDMA_CM_EVENT_ROUTE_RESOLVED: - ret = iser_route_handler(cma_id); + iser_route_handler(cma_id); break; case RDMA_CM_EVENT_ESTABLISHED: iser_connected_handler(cma_id); @@ -789,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: - ret = iser_connect_error(cma_id); + iser_connect_error(cma_id); break; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_ADDR_CHANGE: - ret = iser_disconnected_handler(cma_id); + iser_disconnected_handler(cma_id); break; default: iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } - return ret; + return 0; } void iser_conn_init(struct iser_conn *ib_conn) @@ -809,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn) init_waitqueue_head(&ib_conn->wait); ib_conn->post_recv_buf_count = 0; atomic_set(&ib_conn->post_send_buf_count, 0); - atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ + init_completion(&ib_conn->stop_completion); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); } @@ -837,7 +828,6 @@ int iser_connect(struct iser_conn *ib_conn, ib_conn->state = ISER_CONN_PENDING; - iser_conn_get(ib_conn); /* ref ib conn's cma id */ ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)ib_conn, RDMA_PS_TCP, IB_QPT_RC); @@ -874,9 +864,8 @@ id_failure: ib_conn->cma_id = NULL; addr_failure: ib_conn->state = ISER_CONN_DOWN; - iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ connect_failure: - iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ + iser_conn_release(ib_conn); return err; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 66a908bf3fb9..e3c2c5b4297f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#define pr_fmt(fmt) PFX fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/init.h> @@ -66,6 +66,8 @@ static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; static unsigned int indirect_sg_entries; static bool allow_ext_sg; +static bool prefer_fr; +static bool register_always; static int topspin_workarounds = 1; module_param(srp_sg_tablesize, uint, 0444); @@ -87,6 +89,14 @@ module_param(topspin_workarounds, int, 0444); MODULE_PARM_DESC(topspin_workarounds, "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); +module_param(prefer_fr, bool, 0444); +MODULE_PARM_DESC(prefer_fr, +"Whether to use fast registration if both FMR and fast registration are supported"); + +module_param(register_always, bool, 0444); +MODULE_PARM_DESC(register_always, + "Use memory registration even for contiguous memory regions"); + static struct kernel_param_ops srp_tmo_ops; static int srp_reconnect_delay = 10; @@ -288,28 +298,174 @@ static int srp_new_cm_id(struct srp_target_port *target) return 0; } +static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_fmr_pool_param fmr_param; + + memset(&fmr_param, 0, sizeof(fmr_param)); + fmr_param.pool_size = target->scsi_host->can_queue; + fmr_param.dirty_watermark = fmr_param.pool_size / 4; + fmr_param.cache = 1; + fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; + fmr_param.page_shift = ilog2(dev->mr_page_size); + fmr_param.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + + return ib_create_fmr_pool(dev->pd, &fmr_param); +} + +/** + * srp_destroy_fr_pool() - free the resources owned by a pool + * @pool: Fast registration pool to be destroyed. + */ +static void srp_destroy_fr_pool(struct srp_fr_pool *pool) +{ + int i; + struct srp_fr_desc *d; + + if (!pool) + return; + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + if (d->frpl) + ib_free_fast_reg_page_list(d->frpl); + if (d->mr) + ib_dereg_mr(d->mr); + } + kfree(pool); +} + +/** + * srp_create_fr_pool() - allocate and initialize a pool for fast registration + * @device: IB device to allocate fast registration descriptors for. + * @pd: Protection domain associated with the FR descriptors. + * @pool_size: Number of descriptors to allocate. + * @max_page_list_len: Maximum fast registration work request page list length. + */ +static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, + struct ib_pd *pd, int pool_size, + int max_page_list_len) +{ + struct srp_fr_pool *pool; + struct srp_fr_desc *d; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; + int i, ret = -EINVAL; + + if (pool_size <= 0) + goto err; + ret = -ENOMEM; + pool = kzalloc(sizeof(struct srp_fr_pool) + + pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); + if (!pool) + goto err; + pool->size = pool_size; + pool->max_page_list_len = max_page_list_len; + spin_lock_init(&pool->lock); + INIT_LIST_HEAD(&pool->free_list); + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + mr = ib_alloc_fast_reg_mr(pd, max_page_list_len); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto destroy_pool; + } + d->mr = mr; + frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); + if (IS_ERR(frpl)) { + ret = PTR_ERR(frpl); + goto destroy_pool; + } + d->frpl = frpl; + list_add_tail(&d->entry, &pool->free_list); + } + +out: + return pool; + +destroy_pool: + srp_destroy_fr_pool(pool); + +err: + pool = ERR_PTR(ret); + goto out; +} + +/** + * srp_fr_pool_get() - obtain a descriptor suitable for fast registration + * @pool: Pool to obtain descriptor from. + */ +static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) +{ + struct srp_fr_desc *d = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + if (!list_empty(&pool->free_list)) { + d = list_first_entry(&pool->free_list, typeof(*d), entry); + list_del(&d->entry); + } + spin_unlock_irqrestore(&pool->lock, flags); + + return d; +} + +/** + * srp_fr_pool_put() - put an FR descriptor back in the free list + * @pool: Pool the descriptor was allocated from. + * @desc: Pointer to an array of fast registration descriptor pointers. + * @n: Number of descriptors to put back. + * + * Note: The caller must already have queued an invalidation request for + * desc->mr->rkey before calling this function. + */ +static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, + int n) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&pool->lock, flags); + for (i = 0; i < n; i++) + list_add(&desc[i]->entry, &pool->free_list); + spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + + return srp_create_fr_pool(dev->dev, dev->pd, + target->scsi_host->can_queue, + dev->max_pages_per_mr); +} + static int srp_create_target_ib(struct srp_target_port *target) { + struct srp_device *dev = target->srp_host->srp_dev; struct ib_qp_init_attr *init_attr; struct ib_cq *recv_cq, *send_cq; struct ib_qp *qp; + struct ib_fmr_pool *fmr_pool = NULL; + struct srp_fr_pool *fr_pool = NULL; + const int m = 1 + dev->use_fast_reg; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); if (!init_attr) return -ENOMEM; - recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_recv_completion, NULL, target, + recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target, target->queue_size, target->comp_vector); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } - send_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_send_completion, NULL, target, - target->queue_size, target->comp_vector); + send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, + m * target->queue_size, target->comp_vector); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -318,16 +474,16 @@ static int srp_create_target_ib(struct srp_target_port *target) ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); init_attr->event_handler = srp_qp_event; - init_attr->cap.max_send_wr = target->queue_size; + init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size; init_attr->cap.max_recv_sge = 1; init_attr->cap.max_send_sge = 1; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; init_attr->recv_cq = recv_cq; - qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); + qp = ib_create_qp(dev->pd, init_attr); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_send_cq; @@ -337,6 +493,30 @@ static int srp_create_target_ib(struct srp_target_port *target) if (ret) goto err_qp; + if (dev->use_fast_reg && dev->has_fr) { + fr_pool = srp_alloc_fr_pool(target); + if (IS_ERR(fr_pool)) { + ret = PTR_ERR(fr_pool); + shost_printk(KERN_WARNING, target->scsi_host, PFX + "FR pool allocation failed (%d)\n", ret); + goto err_qp; + } + if (target->fr_pool) + srp_destroy_fr_pool(target->fr_pool); + target->fr_pool = fr_pool; + } else if (!dev->use_fast_reg && dev->has_fmr) { + fmr_pool = srp_alloc_fmr_pool(target); + if (IS_ERR(fmr_pool)) { + ret = PTR_ERR(fmr_pool); + shost_printk(KERN_WARNING, target->scsi_host, PFX + "FMR pool allocation failed (%d)\n", ret); + goto err_qp; + } + if (target->fmr_pool) + ib_destroy_fmr_pool(target->fmr_pool); + target->fmr_pool = fmr_pool; + } + if (target->qp) ib_destroy_qp(target->qp); if (target->recv_cq) @@ -371,8 +551,16 @@ err: */ static void srp_free_target_ib(struct srp_target_port *target) { + struct srp_device *dev = target->srp_host->srp_dev; int i; + if (dev->use_fast_reg) { + if (target->fr_pool) + srp_destroy_fr_pool(target->fr_pool); + } else { + if (target->fmr_pool) + ib_destroy_fmr_pool(target->fmr_pool); + } ib_destroy_qp(target->qp); ib_destroy_cq(target->send_cq); ib_destroy_cq(target->recv_cq); @@ -577,7 +765,8 @@ static void srp_disconnect_target(struct srp_target_port *target) static void srp_free_req_data(struct srp_target_port *target) { - struct ib_device *ibdev = target->srp_host->srp_dev->dev; + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; struct srp_request *req; int i; @@ -586,7 +775,10 @@ static void srp_free_req_data(struct srp_target_port *target) for (i = 0; i < target->req_ring_size; ++i) { req = &target->req_ring[i]; - kfree(req->fmr_list); + if (dev->use_fast_reg) + kfree(req->fr_list); + else + kfree(req->fmr_list); kfree(req->map_page); if (req->indirect_dma_addr) { ib_dma_unmap_single(ibdev, req->indirect_dma_addr, @@ -605,6 +797,7 @@ static int srp_alloc_req_data(struct srp_target_port *target) struct srp_device *srp_dev = target->srp_host->srp_dev; struct ib_device *ibdev = srp_dev->dev; struct srp_request *req; + void *mr_list; dma_addr_t dma_addr; int i, ret = -ENOMEM; @@ -617,12 +810,20 @@ static int srp_alloc_req_data(struct srp_target_port *target) for (i = 0; i < target->req_ring_size; ++i) { req = &target->req_ring[i]; - req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), - GFP_KERNEL); - req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *), - GFP_KERNEL); + mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), + GFP_KERNEL); + if (!mr_list) + goto out; + if (srp_dev->use_fast_reg) + req->fr_list = mr_list; + else + req->fmr_list = mr_list; + req->map_page = kmalloc(srp_dev->max_pages_per_mr * + sizeof(void *), GFP_KERNEL); + if (!req->map_page) + goto out; req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); - if (!req->fmr_list || !req->map_page || !req->indirect_desc) + if (!req->indirect_desc) goto out; dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, @@ -759,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target) } } +static int srp_inv_rkey(struct srp_target_port *target, u32 rkey) +{ + struct ib_send_wr *bad_wr; + struct ib_send_wr wr = { + .opcode = IB_WR_LOCAL_INV, + .wr_id = LOCAL_INV_WR_ID_MASK, + .next = NULL, + .num_sge = 0, + .send_flags = 0, + .ex.invalidate_rkey = rkey, + }; + + return ib_post_send(target->qp, &wr, &bad_wr); +} + static void srp_unmap_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_request *req) { - struct ib_device *ibdev = target->srp_host->srp_dev->dev; - struct ib_pool_fmr **pfmr; + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + int i, res; if (!scsi_sglist(scmnd) || (scmnd->sc_data_direction != DMA_TO_DEVICE && scmnd->sc_data_direction != DMA_FROM_DEVICE)) return; - pfmr = req->fmr_list; - while (req->nfmr--) - ib_fmr_pool_unmap(*pfmr++); + if (dev->use_fast_reg) { + struct srp_fr_desc **pfr; + + for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { + res = srp_inv_rkey(target, (*pfr)->mr->rkey); + if (res < 0) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "Queueing INV WR for rkey %#x failed (%d)\n", + (*pfr)->mr->rkey, res); + queue_work(system_long_wq, + &target->tl_err_work); + } + } + if (req->nmdesc) + srp_fr_pool_put(target->fr_pool, req->fr_list, + req->nmdesc); + } else { + struct ib_pool_fmr **pfmr; + + for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) + ib_fmr_pool_unmap(*pfmr); + } ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), scmnd->sc_data_direction); @@ -813,6 +1049,10 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target, /** * srp_free_req() - Unmap data and add request to the free request list. + * @target: SRP target port. + * @req: Request to be freed. + * @scmnd: SCSI command associated with @req. + * @req_lim_delta: Amount to be added to @target->req_lim. */ static void srp_free_req(struct srp_target_port *target, struct srp_request *req, struct scsi_cmnd *scmnd, @@ -882,21 +1122,19 @@ static int srp_rport_reconnect(struct srp_rport *rport) * callbacks will have finished before a new QP is allocated. */ ret = srp_new_cm_id(target); - /* - * Whether or not creating a new CM ID succeeded, create a new - * QP. This guarantees that all completion callback function - * invocations have finished before request resetting starts. - */ - if (ret == 0) - ret = srp_create_target_ib(target); - else - srp_create_target_ib(target); for (i = 0; i < target->req_ring_size; ++i) { struct srp_request *req = &target->req_ring[i]; srp_finish_req(target, req, NULL, DID_RESET << 16); } + /* + * Whether or not creating a new CM ID succeeded, create a new + * QP. This guarantees that all callback functions for the old QP have + * finished before any send requests are posted on the new QP. + */ + ret += srp_create_target_ib(target); + INIT_LIST_HEAD(&target->free_tx); for (i = 0; i < target->queue_size; ++i) list_add(&target->tx_ring[i]->list, &target->free_tx); @@ -928,33 +1166,87 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, static int srp_map_finish_fmr(struct srp_map_state *state, struct srp_target_port *target) { - struct srp_device *dev = target->srp_host->srp_dev; struct ib_pool_fmr *fmr; u64 io_addr = 0; - if (!state->npages) - return 0; - - if (state->npages == 1) { - srp_map_desc(state, state->base_dma_addr, state->fmr_len, - target->rkey); - state->npages = state->fmr_len = 0; - return 0; - } - - fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages, + fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages, state->npages, io_addr); if (IS_ERR(fmr)) return PTR_ERR(fmr); *state->next_fmr++ = fmr; - state->nfmr++; + state->nmdesc++; + + srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey); - srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey); - state->npages = state->fmr_len = 0; return 0; } +static int srp_map_finish_fr(struct srp_map_state *state, + struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_send_wr *bad_wr; + struct ib_send_wr wr; + struct srp_fr_desc *desc; + u32 rkey; + + desc = srp_fr_pool_get(target->fr_pool); + if (!desc) + return -ENOMEM; + + rkey = ib_inc_rkey(desc->mr->rkey); + ib_update_fast_reg_key(desc->mr, rkey); + + memcpy(desc->frpl->page_list, state->pages, + sizeof(state->pages[0]) * state->npages); + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_FAST_REG_MR; + wr.wr_id = FAST_REG_WR_ID_MASK; + wr.wr.fast_reg.iova_start = state->base_dma_addr; + wr.wr.fast_reg.page_list = desc->frpl; + wr.wr.fast_reg.page_list_len = state->npages; + wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); + wr.wr.fast_reg.length = state->dma_len; + wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + wr.wr.fast_reg.rkey = desc->mr->lkey; + + *state->next_fr++ = desc; + state->nmdesc++; + + srp_map_desc(state, state->base_dma_addr, state->dma_len, + desc->mr->rkey); + + return ib_post_send(target->qp, &wr, &bad_wr); +} + +static int srp_finish_mapping(struct srp_map_state *state, + struct srp_target_port *target) +{ + int ret = 0; + + if (state->npages == 0) + return 0; + + if (state->npages == 1 && !register_always) + srp_map_desc(state, state->base_dma_addr, state->dma_len, + target->rkey); + else + ret = target->srp_host->srp_dev->use_fast_reg ? + srp_map_finish_fr(state, target) : + srp_map_finish_fmr(state, target); + + if (ret == 0) { + state->npages = 0; + state->dma_len = 0; + } + + return ret; +} + static void srp_map_update_start(struct srp_map_state *state, struct scatterlist *sg, int sg_index, dma_addr_t dma_addr) @@ -967,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state, static int srp_map_sg_entry(struct srp_map_state *state, struct srp_target_port *target, struct scatterlist *sg, int sg_index, - int use_fmr) + bool use_mr) { struct srp_device *dev = target->srp_host->srp_dev; struct ib_device *ibdev = dev->dev; @@ -979,23 +1271,25 @@ static int srp_map_sg_entry(struct srp_map_state *state, if (!dma_len) return 0; - if (use_fmr == SRP_MAP_NO_FMR) { - /* Once we're in direct map mode for a request, we don't - * go back to FMR mode, so no need to update anything + if (!use_mr) { + /* + * Once we're in direct map mode for a request, we don't + * go back to FMR or FR mode, so no need to update anything * other than the descriptor. */ srp_map_desc(state, dma_addr, dma_len, target->rkey); return 0; } - /* If we start at an offset into the FMR page, don't merge into - * the current FMR. Finish it out, and use the kernel's MR for this - * sg entry. This is to avoid potential bugs on some SRP targets - * that were never quite defined, but went away when the initiator - * avoided using FMR on such page fragments. + /* + * Since not all RDMA HW drivers support non-zero page offsets for + * FMR, if we start at an offset into a page, don't merge into the + * current FMR mapping. Finish it out, and use the kernel's MR for + * this sg entry. */ - if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) { - ret = srp_map_finish_fmr(state, target); + if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) || + dma_len > dev->mr_max_size) { + ret = srp_finish_mapping(state, target); if (ret) return ret; @@ -1004,52 +1298,106 @@ static int srp_map_sg_entry(struct srp_map_state *state, return 0; } - /* If this is the first sg to go into the FMR, save our position. - * We need to know the first unmapped entry, its index, and the - * first unmapped address within that entry to be able to restart - * mapping after an error. + /* + * If this is the first sg that will be mapped via FMR or via FR, save + * our position. We need to know the first unmapped entry, its index, + * and the first unmapped address within that entry to be able to + * restart mapping after an error. */ if (!state->unmapped_sg) srp_map_update_start(state, sg, sg_index, dma_addr); while (dma_len) { - if (state->npages == SRP_FMR_SIZE) { - ret = srp_map_finish_fmr(state, target); + unsigned offset = dma_addr & ~dev->mr_page_mask; + if (state->npages == dev->max_pages_per_mr || offset != 0) { + ret = srp_finish_mapping(state, target); if (ret) return ret; srp_map_update_start(state, sg, sg_index, dma_addr); } - len = min_t(unsigned int, dma_len, dev->fmr_page_size); + len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); if (!state->npages) state->base_dma_addr = dma_addr; - state->pages[state->npages++] = dma_addr; - state->fmr_len += len; + state->pages[state->npages++] = dma_addr & dev->mr_page_mask; + state->dma_len += len; dma_addr += len; dma_len -= len; } - /* If the last entry of the FMR wasn't a full page, then we need to + /* + * If the last entry of the MR wasn't a full page, then we need to * close it out and start a new one -- we can only merge at page * boundries. */ ret = 0; - if (len != dev->fmr_page_size) { - ret = srp_map_finish_fmr(state, target); + if (len != dev->mr_page_size) { + ret = srp_finish_mapping(state, target); if (!ret) srp_map_update_start(state, NULL, 0, 0); } return ret; } +static int srp_map_sg(struct srp_map_state *state, + struct srp_target_port *target, struct srp_request *req, + struct scatterlist *scat, int count) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + struct scatterlist *sg; + int i; + bool use_mr; + + state->desc = req->indirect_desc; + state->pages = req->map_page; + if (dev->use_fast_reg) { + state->next_fr = req->fr_list; + use_mr = !!target->fr_pool; + } else { + state->next_fmr = req->fmr_list; + use_mr = !!target->fmr_pool; + } + + for_each_sg(scat, sg, count, i) { + if (srp_map_sg_entry(state, target, sg, i, use_mr)) { + /* + * Memory registration failed, so backtrack to the + * first unmapped entry and continue on without using + * memory registration. + */ + dma_addr_t dma_addr; + unsigned int dma_len; + +backtrack: + sg = state->unmapped_sg; + i = state->unmapped_index; + + dma_addr = ib_sg_dma_address(ibdev, sg); + dma_len = ib_sg_dma_len(ibdev, sg); + dma_len -= (state->unmapped_addr - dma_addr); + dma_addr = state->unmapped_addr; + use_mr = false; + srp_map_desc(state, dma_addr, dma_len, target->rkey); + } + } + + if (use_mr && srp_finish_mapping(state, target)) + goto backtrack; + + req->nmdesc = state->nmdesc; + + return 0; +} + static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_request *req) { - struct scatterlist *scat, *sg; + struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; - int i, len, nents, count, use_fmr; + int len, nents, count; struct srp_device *dev; struct ib_device *ibdev; struct srp_map_state state; @@ -1081,7 +1429,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - if (count == 1) { + if (count == 1 && !register_always) { /* * The midlayer only generated a single gather/scatter * entry, or DMA mapping coalesced everything to a @@ -1094,13 +1442,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, buf->key = cpu_to_be32(target->rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); - req->nfmr = 0; + req->nmdesc = 0; goto map_complete; } - /* We have more than one scatter/gather entry, so build our indirect - * descriptor table, trying to merge as many entries with FMR as we - * can. + /* + * We have more than one scatter/gather entry, so build our indirect + * descriptor table, trying to merge as many entries as we can. */ indirect_hdr = (void *) cmd->add_data; @@ -1108,35 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, target->indirect_size, DMA_TO_DEVICE); memset(&state, 0, sizeof(state)); - state.desc = req->indirect_desc; - state.pages = req->map_page; - state.next_fmr = req->fmr_list; - - use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; - - for_each_sg(scat, sg, count, i) { - if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) { - /* FMR mapping failed, so backtrack to the first - * unmapped entry and continue on without using FMR. - */ - dma_addr_t dma_addr; - unsigned int dma_len; - -backtrack: - sg = state.unmapped_sg; - i = state.unmapped_index; - - dma_addr = ib_sg_dma_address(ibdev, sg); - dma_len = ib_sg_dma_len(ibdev, sg); - dma_len -= (state.unmapped_addr - dma_addr); - dma_addr = state.unmapped_addr; - use_fmr = SRP_MAP_NO_FMR; - srp_map_desc(&state, dma_addr, dma_len, target->rkey); - } - } - - if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target)) - goto backtrack; + srp_map_sg(&state, target, req, scat, count); /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this @@ -1144,9 +1464,9 @@ backtrack: * guaranteed to fit into the command, as the SCSI layer won't * give us more S/G entries than we allow. */ - req->nfmr = state.nfmr; if (state.ndesc == 1) { - /* FMR mapping was able to collapse this to one entry, + /* + * Memory registration collapsed the sg-list into one entry, * so use a direct descriptor. */ struct srp_direct_buf *buf = (void *) cmd->add_data; @@ -1455,6 +1775,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) /** * srp_tl_err_work() - handle a transport layer error + * @work: Work structure embedded in an SRP target port. * * Note: This function may get invoked before the rport has been created, * hence the target->rport test. @@ -1468,14 +1789,24 @@ static void srp_tl_err_work(struct work_struct *work) srp_start_tl_fail_timers(target->rport); } -static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err, - struct srp_target_port *target) +static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, + bool send_err, struct srp_target_port *target) { if (target->connected && !target->qp_in_error) { - shost_printk(KERN_ERR, target->scsi_host, - PFX "failed %s status %d\n", - send_err ? "send" : "receive", - wc_status); + if (wr_id & LOCAL_INV_WR_ID_MASK) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "LOCAL_INV failed with status %d\n", + wc_status); + } else if (wr_id & FAST_REG_WR_ID_MASK) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "FAST_REG_MR failed status %d\n", + wc_status); + } else { + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %d for iu %p\n", + send_err ? "send" : "receive", + wc_status, (void *)(uintptr_t)wr_id); + } queue_work(system_long_wq, &target->tl_err_work); } target->qp_in_error = true; @@ -1491,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) if (likely(wc.status == IB_WC_SUCCESS)) { srp_handle_recv(target, &wc); } else { - srp_handle_qp_err(wc.status, false, target); + srp_handle_qp_err(wc.wr_id, wc.status, false, target); } } } @@ -1507,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) iu = (struct srp_iu *) (uintptr_t) wc.wr_id; list_add(&iu->list, &target->free_tx); } else { - srp_handle_qp_err(wc.status, true, target); + srp_handle_qp_err(wc.wr_id, wc.status, true, target); } } } @@ -1521,7 +1852,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) struct srp_cmd *cmd; struct ib_device *dev; unsigned long flags; - int len, result; + int len, ret; const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; /* @@ -1533,12 +1864,9 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) if (in_scsi_eh) mutex_lock(&rport->mutex); - result = srp_chkready(target->rport); - if (unlikely(result)) { - scmnd->result = result; - scmnd->scsi_done(scmnd); - goto unlock_rport; - } + scmnd->result = srp_chkready(target->rport); + if (unlikely(scmnd->result)) + goto err; spin_lock_irqsave(&target->lock, flags); iu = __srp_get_tx_iu(target, SRP_IU_CMD); @@ -1553,7 +1881,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, DMA_TO_DEVICE); - scmnd->result = 0; scmnd->host_scribble = (void *) req; cmd = iu->buf; @@ -1570,7 +1897,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) len = srp_map_data(scmnd, target, req); if (len < 0) { shost_printk(KERN_ERR, target->scsi_host, - PFX "Failed to map data\n"); + PFX "Failed to map data (%d)\n", len); + /* + * If we ran out of memory descriptors (-ENOMEM) because an + * application is queuing many requests with more than + * max_pages_per_mr sg-list elements, tell the SCSI mid-layer + * to reduce queue depth temporarily. + */ + scmnd->result = len == -ENOMEM ? + DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; goto err_iu; } @@ -1582,11 +1917,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) goto err_unmap; } + ret = 0; + unlock_rport: if (in_scsi_eh) mutex_unlock(&rport->mutex); - return 0; + return ret; err_unmap: srp_unmap_data(scmnd, target, req); @@ -1594,16 +1931,27 @@ err_unmap: err_iu: srp_put_tx_iu(target, iu, SRP_IU_CMD); + /* + * Avoid that the loops that iterate over the request ring can + * encounter a dangling SCSI command pointer. + */ + req->scmnd = NULL; + spin_lock_irqsave(&target->lock, flags); list_add(&req->list, &target->free_reqs); err_unlock: spin_unlock_irqrestore(&target->lock, flags); - if (in_scsi_eh) - mutex_unlock(&rport->mutex); +err: + if (scmnd->result) { + scmnd->scsi_done(scmnd); + ret = 0; + } else { + ret = SCSI_MLQUEUE_HOST_BUSY; + } - return SCSI_MLQUEUE_HOST_BUSY; + goto unlock_rport; } /* @@ -2310,6 +2658,8 @@ static struct class srp_class = { /** * srp_conn_unique() - check whether the connection to a target is unique + * @host: SRP host. + * @target: SRP target port. */ static bool srp_conn_unique(struct srp_host *host, struct srp_target_port *target) @@ -2605,7 +2955,8 @@ static ssize_t srp_create_target(struct device *dev, container_of(dev, struct srp_host, dev); struct Scsi_Host *target_host; struct srp_target_port *target; - struct ib_device *ibdev = host->srp_dev->dev; + struct srp_device *srp_dev = host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; int ret; target_host = scsi_host_alloc(&srp_template, @@ -2650,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev, goto err; } - if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && - target->cmd_sg_cnt < target->sg_tablesize) { - pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); + if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && + target->cmd_sg_cnt < target->sg_tablesize) { + pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); target->sg_tablesize = target->cmd_sg_cnt; } @@ -2790,9 +3141,9 @@ static void srp_add_one(struct ib_device *device) { struct srp_device *srp_dev; struct ib_device_attr *dev_attr; - struct ib_fmr_pool_param fmr_param; struct srp_host *host; - int max_pages_per_fmr, fmr_page_shift, s, e, p; + int mr_page_shift, s, e, p; + u64 max_pages_per_mr; dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); if (!dev_attr) @@ -2807,15 +3158,39 @@ static void srp_add_one(struct ib_device *device) if (!srp_dev) goto free_attr; + srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && + device->map_phys_fmr && device->unmap_fmr); + srp_dev->has_fr = (dev_attr->device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS); + if (!srp_dev->has_fmr && !srp_dev->has_fr) + dev_warn(&device->dev, "neither FMR nor FR is supported\n"); + + srp_dev->use_fast_reg = (srp_dev->has_fr && + (!srp_dev->has_fmr || prefer_fr)); + /* * Use the smallest page size supported by the HCA, down to a * minimum of 4096 bytes. We're unlikely to build large sglists * out of smaller entries. */ - fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); - srp_dev->fmr_page_size = 1 << fmr_page_shift; - srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1); - srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE; + mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); + srp_dev->mr_page_size = 1 << mr_page_shift; + srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); + max_pages_per_mr = dev_attr->max_mr_size; + do_div(max_pages_per_mr, srp_dev->mr_page_size); + srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, + max_pages_per_mr); + if (srp_dev->use_fast_reg) { + srp_dev->max_pages_per_mr = + min_t(u32, srp_dev->max_pages_per_mr, + dev_attr->max_fast_reg_page_list_len); + } + srp_dev->mr_max_size = srp_dev->mr_page_size * + srp_dev->max_pages_per_mr; + pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", + device->name, mr_page_shift, dev_attr->max_mr_size, + dev_attr->max_fast_reg_page_list_len, + srp_dev->max_pages_per_mr, srp_dev->mr_max_size); INIT_LIST_HEAD(&srp_dev->dev_list); @@ -2831,27 +3206,6 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->mr)) goto err_pd; - for (max_pages_per_fmr = SRP_FMR_SIZE; - max_pages_per_fmr >= SRP_FMR_MIN_SIZE; - max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) { - memset(&fmr_param, 0, sizeof fmr_param); - fmr_param.pool_size = SRP_FMR_POOL_SIZE; - fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE; - fmr_param.cache = 1; - fmr_param.max_pages_per_fmr = max_pages_per_fmr; - fmr_param.page_shift = fmr_page_shift; - fmr_param.access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - - srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param); - if (!IS_ERR(srp_dev->fmr_pool)) - break; - } - - if (IS_ERR(srp_dev->fmr_pool)) - srp_dev->fmr_pool = NULL; - if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; @@ -2914,8 +3268,6 @@ static void srp_remove_one(struct ib_device *device) kfree(host); } - if (srp_dev->fmr_pool) - ib_destroy_fmr_pool(srp_dev->fmr_pool); ib_dereg_mr(srp_dev->mr); ib_dealloc_pd(srp_dev->pd); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index aad27b7b4a46..e46ecb15aa0d 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -66,13 +66,10 @@ enum { SRP_TAG_NO_REQ = ~0U, SRP_TAG_TSK_MGMT = 1U << 31, - SRP_FMR_SIZE = 512, - SRP_FMR_MIN_SIZE = 128, - SRP_FMR_POOL_SIZE = 1024, - SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4, + SRP_MAX_PAGES_PER_MR = 512, - SRP_MAP_ALLOW_FMR = 0, - SRP_MAP_NO_FMR = 1, + LOCAL_INV_WR_ID_MASK = 1, + FAST_REG_WR_ID_MASK = 2, }; enum srp_target_state { @@ -86,15 +83,24 @@ enum srp_iu_type { SRP_IU_RSP, }; +/* + * @mr_page_mask: HCA memory registration page mask. + * @mr_page_size: HCA memory registration page size. + * @mr_max_size: Maximum size in bytes of a single FMR / FR registration + * request. + */ struct srp_device { struct list_head dev_list; struct ib_device *dev; struct ib_pd *pd; struct ib_mr *mr; - struct ib_fmr_pool *fmr_pool; - u64 fmr_page_mask; - int fmr_page_size; - int fmr_max_size; + u64 mr_page_mask; + int mr_page_size; + int mr_max_size; + int max_pages_per_mr; + bool has_fmr; + bool has_fr; + bool use_fast_reg; }; struct srp_host { @@ -112,11 +118,14 @@ struct srp_request { struct list_head list; struct scsi_cmnd *scmnd; struct srp_iu *cmd; - struct ib_pool_fmr **fmr_list; + union { + struct ib_pool_fmr **fmr_list; + struct srp_fr_desc **fr_list; + }; u64 *map_page; struct srp_direct_buf *indirect_desc; dma_addr_t indirect_dma_addr; - short nfmr; + short nmdesc; short index; }; @@ -131,6 +140,10 @@ struct srp_target_port { struct ib_cq *send_cq ____cacheline_aligned_in_smp; struct ib_cq *recv_cq; struct ib_qp *qp; + union { + struct ib_fmr_pool *fmr_pool; + struct srp_fr_pool *fr_pool; + }; u32 lkey; u32 rkey; enum srp_target_state state; @@ -197,15 +210,66 @@ struct srp_iu { enum dma_data_direction direction; }; +/** + * struct srp_fr_desc - fast registration work request arguments + * @entry: Entry in srp_fr_pool.free_list. + * @mr: Memory region. + * @frpl: Fast registration page list. + */ +struct srp_fr_desc { + struct list_head entry; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; +}; + +/** + * struct srp_fr_pool - pool of fast registration descriptors + * + * An entry is available for allocation if and only if it occurs in @free_list. + * + * @size: Number of descriptors in this pool. + * @max_page_list_len: Maximum fast registration work request page list length. + * @lock: Protects free_list. + * @free_list: List of free descriptors. + * @desc: Fast registration descriptor pool. + */ +struct srp_fr_pool { + int size; + int max_page_list_len; + spinlock_t lock; + struct list_head free_list; + struct srp_fr_desc desc[0]; +}; + +/** + * struct srp_map_state - per-request DMA memory mapping state + * @desc: Pointer to the element of the SRP buffer descriptor array + * that is being filled in. + * @pages: Array with DMA addresses of pages being considered for + * memory registration. + * @base_dma_addr: DMA address of the first page that has not yet been mapped. + * @dma_len: Number of bytes that will be registered with the next + * FMR or FR memory registration call. + * @total_len: Total number of bytes in the sg-list being mapped. + * @npages: Number of page addresses in the pages[] array. + * @nmdesc: Number of FMR or FR memory descriptors used for mapping. + * @ndesc: Number of SRP buffer descriptors that have been filled in. + * @unmapped_sg: First element of the sg-list that is mapped via FMR or FR. + * @unmapped_index: Index of the first element mapped via FMR or FR. + * @unmapped_addr: DMA address of the first element mapped via FMR or FR. + */ struct srp_map_state { - struct ib_pool_fmr **next_fmr; + union { + struct ib_pool_fmr **next_fmr; + struct srp_fr_desc **next_fr; + }; struct srp_direct_buf *desc; u64 *pages; dma_addr_t base_dma_addr; - u32 fmr_len; + u32 dma_len; u32 total_len; unsigned int npages; - unsigned int nfmr; + unsigned int nmdesc; unsigned int ndesc; struct scatterlist *unmapped_sg; int unmapped_index; diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index c3ad464d0627..b0297da50304 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -171,7 +171,7 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) */ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf) + struct mlx4_buf *buf, gfp_t gfp) { dma_addr_t t; @@ -180,7 +180,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); + size, &t, gfp); if (!buf->direct.buf) return -ENOMEM; @@ -200,14 +200,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); + gfp); if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); + &t, gfp); if (!buf->page_list[i].buf) goto err_free; @@ -218,7 +218,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, if (BITS_PER_LONG == 64) { struct page **pages; - pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); + pages = kmalloc(sizeof *pages * buf->nbufs, gfp); if (!pages) goto err_free; for (i = 0; i < buf->nbufs; ++i) @@ -260,11 +260,12 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) } EXPORT_SYMBOL_GPL(mlx4_buf_free); -static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, + gfp_t gfp) { struct mlx4_db_pgdir *pgdir; - pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); + pgdir = kzalloc(sizeof *pgdir, gfp); if (!pgdir) return NULL; @@ -272,7 +273,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, GFP_KERNEL); + &pgdir->db_dma, gfp); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -312,7 +313,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; @@ -324,7 +325,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev)); + pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp); if (!pgdir) { ret = -ENOMEM; goto out; @@ -376,13 +377,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, { int err; - err = mlx4_db_alloc(dev, &wqres->db, 1); + err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL); if (err) return err; *wqres->db.db = 0; - err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf); + err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL); if (err) goto err_db; @@ -391,7 +392,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78099eab7673..3370ecb8c3d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -705,20 +705,28 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, struct ib_smp *smp = inbox->buf; u32 index; u8 port; + u8 opcode_modifier; u16 *table; int err; int vidx, pidx; + int network_view; struct mlx4_priv *priv = mlx4_priv(dev); struct ib_smp *outsmp = outbox->buf; __be16 *outtab = (__be16 *)(outsmp->data); __be32 slave_cap_mask; __be64 slave_node_guid; + port = vhcr->in_modifier; + /* network-view bit is for driver use only, and should not be passed to FW */ + opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */ + network_view = !!(vhcr->op_modifier & 0x8); + if (smp->base_version == 1 && smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->class_version == 1) { - if (smp->method == IB_MGMT_METHOD_GET) { + /* host view is paravirtualized */ + if (!network_view && smp->method == IB_MGMT_METHOD_GET) { if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { index = be32_to_cpu(smp->attr_mod); if (port < 1 || port > dev->caps.num_ports) @@ -743,7 +751,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, /*get the slave specific caps:*/ /*do the command */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); /* modify the response for slaves */ if (!err && slave != mlx4_master_func_num(dev)) { @@ -760,7 +768,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, smp->attr_mod = cpu_to_be32(slave / 8); /* execute cmd */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { /* if needed, move slave gid to index 0 */ @@ -774,7 +782,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { slave_node_guid = mlx4_get_slave_node_guid(dev, slave); @@ -784,19 +792,24 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } } } + + /* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs. + * These are the MADs used by ib verbs (such as ib_query_gids). + */ if (slave != mlx4_master_func_num(dev) && - ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || - (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && - smp->method == IB_MGMT_METHOD_SET))) { - mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " - "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", - slave, smp->method, smp->mgmt_class, - be16_to_cpu(smp->attr_id)); - return -EPERM; + !mlx4_vf_smi_enabled(dev, slave, port)) { + if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->method == IB_MGMT_METHOD_GET) || network_view) { + mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n", + slave, smp->method, smp->mgmt_class, + network_view ? "Network" : "Host", + be16_to_cpu(smp->attr_id)); + return -EPERM; + } } - /*default:*/ + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } @@ -1653,6 +1666,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) for (port = min_port; port <= max_port; port++) { if (!test_bit(port - 1, actv_ports.ports)) continue; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + priv->mfunc.master.vf_admin[slave].enable_smi[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; @@ -1704,6 +1719,8 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave for (port = min_port; port <= max_port; port++) { if (!test_bit(port - 1, actv_ports.ports)) continue; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + MLX4_VF_SMI_DISABLED; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, @@ -2537,3 +2554,50 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); + +int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_oper[slave].smi_enabled[port] == + MLX4_VF_SMI_ENABLED; +} +EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); + +int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave == mlx4_master_func_num(dev)) + return 1; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_admin[slave].enable_smi[port] == + MLX4_VF_SMI_ENABLED; +} +EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin); + +int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, + int enabled) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave == mlx4_master_func_num(dev)) + return 0; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS || + enabled < 0 || enabled > 1) + return -EINVAL; + + priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin); diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 0487121e4a0f..c90cde5b4aee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -173,11 +173,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) if (*cqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &cq_table->table, *cqn); + err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL); if (err) goto err_out; - err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); + err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL); if (err) goto err_put; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ba049ae88749..87857a6463eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -972,7 +972,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, if (!context) return -ENOMEM; - err = mlx4_qp_alloc(mdev->dev, qpn, qp); + err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; @@ -1012,7 +1012,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) en_err(priv, "Failed reserving drop qpn\n"); return err; } - err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); @@ -1071,7 +1071,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } /* Configure RSS indirection qp */ - err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index dd1f6d346459..bc0cc1eb214d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -113,7 +113,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); ring->qpn = qpn; - err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_map; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d16a4d118903..01e6dd61ee3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -178,8 +178,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - u8 field; - u32 size; + u8 field, port; + u32 size, proxy_qp, qkey; int err = 0; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 @@ -209,6 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 +#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET 0x4 #define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 #define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc @@ -221,6 +222,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10 +#define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 @@ -234,28 +236,35 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, return -EINVAL; vhcr->in_modifier = converted_port; - /* Set nic_info bit to mark new fields support */ - field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); - /* phys-port = logical-port */ field = vhcr->in_modifier - find_first_bit(actv_ports.ports, dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - field = vhcr->in_modifier; + port = vhcr->in_modifier; + proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1; + + /* Set nic_info bit to mark new fields support */ + field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; + + if (mlx4_vf_smi_enabled(dev, slave, port) && + !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) { + field |= QUERY_FUNC_CAP_VF_ENABLE_QP0; + MLX4_PUT(outbox->buf, qkey, + QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + } + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); + /* size is now the QP number */ - size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); - size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); - - size += 2; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY); + proxy_qp += 2; + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY); MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); @@ -326,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field, op_modifier; - u32 size; + u32 size, qkey; int err = 0, quotas = 0; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ @@ -414,7 +423,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { - if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_OFFSET) { + if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN) { mlx4_err(dev, "VLAN is enforced on this port\n"); err = -EPROTONOSUPPORT; goto out; @@ -442,6 +451,13 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } + if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { + MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + func_cap->qp0_qkey = qkey; + } else { + func_cap->qp0_qkey = 0; + } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 6811ee00ba7c..1fce03ebe5c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,6 +134,7 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; + u32 qp0_qkey; u32 qp0_tunnel_qpn; u32 qp0_proxy_qpn; u32 qp1_tunnel_qpn; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 5fbf4924c272..97c9b1db1d27 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -245,7 +245,8 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, + gfp_t gfp) { u32 i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); @@ -259,7 +260,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) } table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, - (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + (table->lowmem ? gfp : GFP_HIGHUSER) | __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; @@ -356,7 +357,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 i; for (i = start; i <= end; i += inc) { - err = mlx4_table_get(dev, table, i); + err = mlx4_table_get(dev, table, i, GFP_KERNEL); if (err) goto fail; } diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index dee67fa39107..0c7364550150 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -71,7 +71,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask, int coherent); void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, + gfp_t gfp); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 start, u32 end); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7cf9dadcb471..908326876ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -666,13 +666,15 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy || + !dev->caps.qp0_qkey) { err = -ENOMEM; goto err_mem; } @@ -684,6 +686,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) " port %d, aborting (%d).\n", i, err); goto err_mem; } + dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey; dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; @@ -729,12 +732,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return 0; err_mem: + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + dev->caps.qp0_qkey = NULL; + dev->caps.qp0_tunnel = NULL; + dev->caps.qp0_proxy = NULL; + dev->caps.qp1_tunnel = NULL; + dev->caps.qp1_proxy = NULL; return err; } @@ -1696,6 +1703,14 @@ unmap_bf: unmap_internal_clock(dev); unmap_bf_area(dev); + if (mlx4_is_slave(dev)) { + kfree(dev->caps.qp0_qkey); + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + } + err_close: if (mlx4_is_slave(dev)) mlx4_slave_exit(dev); @@ -2565,6 +2580,14 @@ err_master_mfunc: if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); + if (mlx4_is_slave(dev)) { + kfree(dev->caps.qp0_qkey); + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + } + err_close: if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); @@ -2688,6 +2711,7 @@ static void __mlx4_remove_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f9c465101963..7d39cb30c883 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -133,6 +133,11 @@ enum { MLX4_COMM_CMD_FLR = 254 }; +enum { + MLX4_VF_SMI_DISABLED, + MLX4_VF_SMI_ENABLED +}; + /*The flag indicates that the slave should delay the RESET cmd*/ #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb /*indicates how many retries will be done if we are in the middle of FLR*/ @@ -488,6 +493,7 @@ struct mlx4_vport_state { struct mlx4_vf_admin_state { struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1]; + u8 enable_smi[MLX4_MAX_PORTS + 1]; }; struct mlx4_vport_oper_state { @@ -495,8 +501,10 @@ struct mlx4_vport_oper_state { int mac_idx; int vlan_idx; }; + struct mlx4_vf_oper_state { struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1]; + u8 smi_enabled[MLX4_MAX_PORTS + 1]; }; struct slave_list { @@ -888,7 +896,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp); void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); @@ -896,7 +904,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); int __mlx4_mpt_reserve(struct mlx4_dev *dev); void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp); void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 24835853b753..4c71dafad217 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -364,14 +364,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) __mlx4_mpt_release(dev, index); } -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; - return mlx4_table_get(dev, &mr_table->dmpt_table, index); + return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp); } -static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) { u64 param = 0; @@ -382,7 +382,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mpt_alloc_icm(dev, index); + return __mlx4_mpt_alloc_icm(dev, index, gfp); } void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) @@ -469,7 +469,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL); if (err) return err; @@ -627,13 +627,14 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_write_mtt); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf) + struct mlx4_buf *buf, gfp_t gfp) { u64 *page_list; int err; int i; - page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL); + page_list = kmalloc(buf->npages * sizeof *page_list, + gfp); if (!page_list) return -ENOMEM; @@ -680,7 +681,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 61d64ebffd56..07198cacbb20 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -272,29 +272,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) } EXPORT_SYMBOL_GPL(mlx4_qp_release_range); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - err = mlx4_table_get(dev, &qp_table->qp_table, qpn); + err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp); if (err) goto err_out; - err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); + err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp); if (err) goto err_put_qp; - err = mlx4_table_get(dev, &qp_table->altc_table, qpn); + err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp); if (err) goto err_put_auxc; - err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp); if (err) goto err_put_altc; - err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); + err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp); if (err) goto err_put_rdmarc; @@ -316,7 +316,7 @@ err_out: return err; } -static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) { u64 param = 0; @@ -326,7 +326,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_qp_alloc_icm(dev, qpn); + return __mlx4_qp_alloc_icm(dev, qpn, gfp); } void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) @@ -355,7 +355,7 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) __mlx4_qp_free_icm(dev, qpn); } -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; @@ -366,7 +366,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) qp->qpn = qpn; - err = mlx4_qp_alloc_icm(dev, qpn); + err = mlx4_qp_alloc_icm(dev, qpn, gfp); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3fdd4a1f7d..abdb000bba30 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1532,7 +1532,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; if (!fw_reserved(dev, qpn)) { - err = __mlx4_qp_alloc_icm(dev, qpn); + err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; @@ -1619,7 +1619,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mpt_alloc_icm(dev, mpt->key); + err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; @@ -2827,10 +2827,12 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, } static int verify_qp_parameters(struct mlx4_dev *dev, + struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, enum qp_transition transition, u8 slave) { u32 qp_type; + u32 qpn; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; int port; @@ -2873,8 +2875,22 @@ static int verify_qp_parameters(struct mlx4_dev *dev, default: break; } + break; + case MLX4_QP_ST_MLX: + qpn = vhcr->in_modifier & 0x7fffff; + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (transition == QP_TRANS_INIT2RTR && + slave != mlx4_master_func_num(dev) && + mlx4_is_qp_reserved(dev, qpn) && + !mlx4_vf_smi_enabled(dev, slave, port)) { + /* only enabled VFs may create MLX proxy QPs */ + mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n", + __func__, slave, port); + return -EPERM; + } break; + default: break; } @@ -3454,7 +3470,7 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, qpc, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave); if (err) return err; @@ -3508,7 +3524,7 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave); if (err) return err; @@ -3530,7 +3546,7 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave); if (err) return err; @@ -3567,7 +3583,7 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave); if (err) return err; @@ -3589,7 +3605,7 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 98faf870b0b0..67146624eb58 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -103,11 +103,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) if (*srqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &srq_table->table, *srqn); + err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL); if (err) goto err_out; - err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); + err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL); if (err) goto err_put; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 4cc927649404..ac52a0fe2d3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -82,7 +82,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return mlx5_cmd_status_to_err(&lout.hdr); } + mr->iova = be64_to_cpu(in->seg.start_addr); + mr->size = be64_to_cpu(in->seg.len); mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; + mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(lout.mkey), key, mr->key); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd21295a..3447bead9620 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -401,6 +401,7 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; + u32 *qp0_qkey; u32 *qp0_proxy; u32 *qp1_proxy; u32 *qp0_tunnel; @@ -837,7 +838,7 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev) } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf); + struct mlx4_buf *buf, gfp_t gfp); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { @@ -874,9 +875,10 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf); + struct mlx4_buf *buf, gfp_t gfp); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, + gfp_t gfp); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -892,7 +894,8 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, + gfp_t gfp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, @@ -1234,4 +1237,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); +int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); +int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); +int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, + int enable); #endif /* MLX4_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 93cef6313e72..2bce4aad2570 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -427,7 +427,6 @@ struct mlx5_core_mr { u64 size; u32 key; u32 pd; - u32 access; }; struct mlx5_core_srq { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index acd825182977..7ccef342f724 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -80,8 +80,8 @@ enum rdma_transport_type { RDMA_TRANSPORT_USNIC_UDP }; -enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; +__attribute_const__ enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type); enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, @@ -466,14 +466,14 @@ enum ib_rate { * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ -int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate); /** * ib_rate_to_mbps - Convert the IB rate enum to Mbps. * For example, IB_RATE_2_5_GBPS will be converted to 2500. * @rate: rate to convert. */ -int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); enum ib_mr_create_flags { IB_MR_SIGNATURE_EN = 1, @@ -604,7 +604,7 @@ struct ib_mr_status { * enum. * @mult: multiple to convert. */ -enum ib_rate mult_to_ib_rate(int mult) __attribute_const__; +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct ib_ah_attr { struct ib_global_route grh; @@ -783,6 +783,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, + IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, |