From 5e6a7d1eb834be23243c48a964ced67ab0b6e9ce Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 9 Mar 2022 14:29:00 +0100 Subject: nvme-multipath: use vmalloc for ANA log buffer The ANA log buffer can get really large, as it depends on the controller configuration. So to avoid an out-of-memory issue during scanning use kvmalloc() instead of the kmalloc(). Signed-off-by: Hannes Reinecke Tested-by: Daniel Wagner Signed-off-by: Daniel Wagner Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f8bf6606eb2f..3ddc1ba66f0d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -5,6 +5,7 @@ #include #include +#include #include #include "nvme.h" @@ -898,7 +899,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) if (ana_log_size > ctrl->ana_log_size) { nvme_mpath_stop(ctrl); nvme_mpath_uninit(ctrl); - ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL); + ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL); if (!ctrl->ana_log_buf) return -ENOMEM; } @@ -915,7 +916,7 @@ out_uninit: void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { - kfree(ctrl->ana_log_buf); + kvfree(ctrl->ana_log_buf); ctrl->ana_log_buf = NULL; ctrl->ana_log_size = 0; } -- cgit v1.2.3 From 8f31dded50d927974a5a696196052ed53f1baf66 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 9 Mar 2022 14:02:28 +0800 Subject: nvme-multipath: call bio_io_error in nvme_ns_head_submit_bio Use bio_io_error() here since bio_io_error does the same thing. Signed-off-by: Guoqing Jiang Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3ddc1ba66f0d..d13b81cd6225 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -387,8 +387,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio) } else { dev_warn_ratelimited(dev, "no available path - failing I/O\n"); - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); } srcu_read_unlock(&head->srcu, srcu_idx); -- cgit v1.2.3 From 462b8b2d84975758e5b74fe832bfe8145eef403f Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Feb 2022 19:36:56 -0800 Subject: nvme-tcp: don't initialize ret variable No point in initializing ret variable to 0 in nvme_tcp_start_io_queue() since it gets overwritten by a call to nvme_tcp_start_queue(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 6cbcc8b4daaf..bb03ae5ef11f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1674,7 +1674,7 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) { - int i, ret = 0; + int i, ret; for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_tcp_start_queue(ctrl, i); -- cgit v1.2.3 From a387935c241d2517c22546f1206a77a856a40541 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Feb 2022 19:36:57 -0800 Subject: nvme-tcp: don't fold the line The call to nvme_tcp_alloc_queue() fits perfectly in one line without exceeding 80 char limit for the line. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index bb03ae5ef11f..babbc14a4b76 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1714,8 +1714,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_tcp_alloc_queue(ctrl, i, - ctrl->sqsize + 1); + ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1); if (ret) goto out_free_queues; } -- cgit v1.2.3 From 841aee4d75f18fdfb53935080b03de0c65e9b92c Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 15 Feb 2022 18:22:49 -0800 Subject: nvme-tcp: lockdep: annotate in-kernel sockets Put NVMe/TCP sockets in their own class to avoid some lockdep warnings. Sockets created by nvme-tcp are not exposed to user-space, and will not trigger certain code paths that the general socket API exposes. Lockdep complains about a circular dependency between the socket and filesystem locks, because setsockopt can trigger a page fault with a socket lock held, but nvme-tcp sends requests on the socket while file system locks are held. ====================================================== WARNING: possible circular locking dependency detected 5.15.0-rc3 #1 Not tainted ------------------------------------------------------ fio/1496 is trying to acquire lock: (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendpage+0x23/0x80 but task is already holding lock: (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs] which lock already depends on the new lock. other info that might help us debug this: chain exists of: sk_lock-AF_INET --> sb_internal --> &xfs_dir_ilock_class/5 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&xfs_dir_ilock_class/5); lock(sb_internal); lock(&xfs_dir_ilock_class/5); lock(sk_lock-AF_INET); *** DEADLOCK *** 6 locks held by fio/1496: #0: (sb_writers#13){.+.+}-{0:0}, at: path_openat+0x9fc/0xa20 #1: (&inode->i_sb->s_type->i_mutex_dir_key){++++}-{3:3}, at: path_openat+0x296/0xa20 #2: (sb_internal){.+.+}-{0:0}, at: xfs_trans_alloc_icreate+0x41/0xd0 [xfs] #3: (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs] #4: (hctx->srcu){....}-{0:0}, at: hctx_lock+0x51/0xd0 #5: (&queue->send_mutex){+.+.}-{3:3}, at: nvme_tcp_queue_rq+0x33e/0x380 [nvme_tcp] This annotation lets lockdep analyze nvme-tcp controlled sockets independently of what the user-space sockets API does. Link: https://lore.kernel.org/linux-nvme/CAHj4cs9MDYLJ+q+2_GXUK9HxFizv2pxUryUR0toX974M040z7g@mail.gmail.com/ Signed-off-by: Chris Leech Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index babbc14a4b76..10fc45d95b86 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -30,6 +30,44 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* lockdep can detect a circular dependency of the form + * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock + * because dependencies are tracked for both nvme-tcp and user contexts. Using + * a separate class prevents lockdep from conflating nvme-tcp socket use with + * user-space socket API use. + */ +static struct lock_class_key nvme_tcp_sk_key[2]; +static struct lock_class_key nvme_tcp_slock_key[2]; + +static void nvme_tcp_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", + &nvme_tcp_slock_key[0], + "sk_lock-AF_INET-NVME", + &nvme_tcp_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", + &nvme_tcp_slock_key[1], + "sk_lock-AF_INET6-NVME", + &nvme_tcp_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +} +#else +static void nvme_tcp_reclassify_socket(struct socket *sock) { } +#endif + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1427,6 +1465,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_destroy_mutex; } + nvme_tcp_reclassify_socket(queue->sock); + /* Single syn retry */ tcp_sock_set_syncnt(queue->sock->sk, 1); -- cgit v1.2.3 From b2fb99e42598ba5d1ac045fb39a2b38fd80dd8d2 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 8 Mar 2022 21:59:25 -0800 Subject: nvmet-fc: fix kernel-doc warning for nvmet_fc_register_targetport This fixes following kernel-doc warning :- drivers/nvme/target/fc.c:1365: warning: expecting prototype for nvme_fc_register_targetport(). Prototype was for nvmet_fc_register_targetport() instead Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index cb013516784c..56c2bdfd8e3b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1341,7 +1341,7 @@ nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport) } /** - * nvme_fc_register_targetport - transport entry point called by an + * nvmet_fc_register_targetport - transport entry point called by an * LLDD to register the existence of a local * NVME subystem FC port. * @pinfo: pointer to information about the port to be registered -- cgit v1.2.3 From 0acb8231ebae5175260a7da853f525f2730cc095 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 8 Mar 2022 21:59:26 -0800 Subject: nvmet-fc: fix kernel-doc warning for nvmet_fc_unregister_targetport This fixes following kernel-doc warning:- drivers/nvme/target/fc.c:1619: warning: expecting prototype for nvme_fc_unregister_targetport(). Prototype was for nvmet_fc_unregister_targetport() instead Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 56c2bdfd8e3b..de90001fc5c4 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1604,7 +1604,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) } /** - * nvme_fc_unregister_targetport - transport entry point called by an + * nvmet_fc_unregister_targetport - transport entry point called by an * LLDD to deregister/remove a previously * registered a local NVME subsystem FC port. * @target_port: pointer to the (registered) target port that is to be -- cgit v1.2.3 From a8adf0cddc230be98da0e68bea846e28686dcb60 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 8 Mar 2022 21:59:27 -0800 Subject: nvmet-rdma: fix kernel-doc warning for nvmet_rdma_device_removal This fixes following kernel-doc warning:- drivers/nvme/target/rdma.c:1722: warning: expecting prototype for nvme_rdma_device_removal(). Prototype was for nvmet_rdma_device_removal() instead Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 00656754e331..2446d0918a41 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1703,7 +1703,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, } /** - * nvme_rdma_device_removal() - Handle RDMA device removal + * nvmet_rdma_device_removal() - Handle RDMA device removal * @cm_id: rdma_cm id, used for nvmet port * @queue: nvmet rdma queue (cm id qp_context) * -- cgit v1.2.3 From 73d77c53ff342bfa23daedf4475cdd06618e447b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 8 Mar 2022 14:56:57 -0800 Subject: nvmet: don't fold lines Don't fold line that can fit into 80 char limit. No functional change in this patch. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 091a0ca16361..0f5c680c851a 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -93,10 +93,9 @@ CONFIGFS_ATTR(nvmet_, addr_adrfam); static ssize_t nvmet_addr_portid_show(struct config_item *item, char *page) { - struct nvmet_port *port = to_nvmet_port(item); + __le16 portid = to_nvmet_port(item)->disc_addr.portid; - return snprintf(page, PAGE_SIZE, "%d\n", - le16_to_cpu(port->disc_addr.portid)); + return snprintf(page, PAGE_SIZE, "%d\n", le16_to_cpu(portid)); } static ssize_t nvmet_addr_portid_store(struct config_item *item, @@ -124,8 +123,7 @@ static ssize_t nvmet_addr_traddr_show(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); - return snprintf(page, PAGE_SIZE, "%s\n", - port->disc_addr.traddr); + return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.traddr); } static ssize_t nvmet_addr_traddr_store(struct config_item *item, @@ -199,8 +197,7 @@ static ssize_t nvmet_addr_trsvcid_show(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); - return snprintf(page, PAGE_SIZE, "%s\n", - port->disc_addr.trsvcid); + return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.trsvcid); } static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, @@ -1236,8 +1233,7 @@ CONFIGFS_ATTR(nvmet_subsys_, attr_model); static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "%s\n", - nvmet_disc_subsys->subsysnqn); + return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn); } static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item, -- cgit v1.2.3 From 98152eb70fffad910ee7c511b110afe98f162fc5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 8 Mar 2022 14:56:58 -0800 Subject: nvmet: use snprintf() with PAGE_SIZE in configfs Instead of using sprintf, use snprintf with buffer size limited to PAGE_SIZE just like what we have for the rest of the file. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 0f5c680c851a..ac801fa1641b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -60,10 +60,11 @@ static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page) for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { if (nvmet_addr_family[i].type == adrfam) - return sprintf(page, "%s\n", nvmet_addr_family[i].name); + return snprintf(page, PAGE_SIZE, "%s\n", + nvmet_addr_family[i].name); } - return sprintf(page, "\n"); + return snprintf(page, PAGE_SIZE, "\n"); } static ssize_t nvmet_addr_adrfam_store(struct config_item *item, @@ -160,10 +161,11 @@ static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { if (treq == nvmet_addr_treq[i].type) - return sprintf(page, "%s\n", nvmet_addr_treq[i].name); + return snprintf(page, PAGE_SIZE, "%s\n", + nvmet_addr_treq[i].name); } - return sprintf(page, "\n"); + return snprintf(page, PAGE_SIZE, "\n"); } static ssize_t nvmet_addr_treq_store(struct config_item *item, @@ -281,7 +283,8 @@ static ssize_t nvmet_addr_trtype_show(struct config_item *item, for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { if (port->disc_addr.trtype == nvmet_transport[i].type) - return sprintf(page, "%s\n", nvmet_transport[i].name); + return snprintf(page, PAGE_SIZE, + "%s\n", nvmet_transport[i].name); } return sprintf(page, "\n"); -- cgit v1.2.3 From da7837339641601f202f27515771dc0646083938 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Mar 2022 08:13:04 +0100 Subject: nvmet: move the call to nvmet_ns_changed out of nvmet_ns_revalidate nvmet_ns_changed states via lockdep that the ns->subsys->lock must be held. The only caller of nvmet_ns_changed which does not acquire that lock is nvmet_ns_revalidate. nvmet_ns_revalidate has 3 callers, of which 2 do not acquire that lock: nvmet_execute_identify_cns_cs_ns and nvmet_execute_identify_ns. The other caller nvmet_ns_revalidate_size_store does acquire the lock. Move the call to nvmet_ns_changed from nvmet_ns_revalidate to the callers so that they can perform the correct locking as needed. This issue was found using a static type-based analyser and manually verified. Reported-by: Niels Dossche Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/admin-cmd.c | 6 +++++- drivers/nvme/target/configfs.c | 3 ++- drivers/nvme/target/core.c | 5 ++--- drivers/nvme/target/nvmet.h | 2 +- drivers/nvme/target/zns.c | 6 +++++- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6fb24746de06..46d0dab686dd 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -511,7 +511,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) goto done; } - nvmet_ns_revalidate(req->ns); + if (nvmet_ns_revalidate(req->ns)) { + mutex_lock(&req->ns->subsys->lock); + nvmet_ns_changed(req->ns->subsys, req->ns->nsid); + mutex_unlock(&req->ns->subsys->lock); + } /* * nuse = ncap = nsze isn't always true, but we have no way to find diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index ac801fa1641b..b650312dd4ae 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -586,7 +586,8 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, mutex_unlock(&ns->subsys->lock); return -EINVAL; } - nvmet_ns_revalidate(ns); + if (nvmet_ns_revalidate(ns)) + nvmet_ns_changed(ns->subsys, ns->nsid); mutex_unlock(&ns->subsys->lock); return count; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 724a6d373340..92af37d33ec3 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -531,7 +531,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, ns->nsid); } -void nvmet_ns_revalidate(struct nvmet_ns *ns) +bool nvmet_ns_revalidate(struct nvmet_ns *ns) { loff_t oldsize = ns->size; @@ -540,8 +540,7 @@ void nvmet_ns_revalidate(struct nvmet_ns *ns) else nvmet_file_ns_revalidate(ns); - if (oldsize != ns->size) - nvmet_ns_changed(ns->subsys, ns->nsid); + return oldsize != ns->size; } int nvmet_ns_enable(struct nvmet_ns *ns) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 69637bf8f8e1..d910c6aad4b6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -542,7 +542,7 @@ u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); void nvmet_file_ns_revalidate(struct nvmet_ns *ns); -void nvmet_ns_revalidate(struct nvmet_ns *ns); +bool nvmet_ns_revalidate(struct nvmet_ns *ns); u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts); bool nvmet_bdev_zns_enable(struct nvmet_ns *ns); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 3e421217a7ad..e34718b09550 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -123,7 +123,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) goto done; } - nvmet_ns_revalidate(req->ns); + if (nvmet_ns_revalidate(req->ns)) { + mutex_lock(&req->ns->subsys->lock); + nvmet_ns_changed(req->ns->subsys, req->ns->nsid); + mutex_unlock(&req->ns->subsys->lock); + } zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >> req->ns->blksize_shift; id_zns->lbafe[0].zsze = cpu_to_le64(zsze); -- cgit v1.2.3 From b739e137052069c996ab8b3bc9c25be501ecc63b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Mar 2022 12:58:06 +0100 Subject: nvme: cleanup how disk->disk_name is assigned They way how assigning the disk name and commenting on why it is done is split over core.c and multipath.c seems to be rather confusing. Now that ns_head->disk always exists we can do all the work in core.c and have a single big comment explaining the issues. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 22 ++++++++++++++++++---- drivers/nvme/host/multipath.c | 24 +----------------------- drivers/nvme/host/nvme.h | 8 ++------ 3 files changed, 21 insertions(+), 33 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f8084ded69e5..fd9878671d73 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3967,13 +3967,27 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_cleanup_disk; /* - * Without the multipath code enabled, multiple controller per - * subsystems are visible as devices and thus we cannot use the - * subsystem instance. + * If multipathing is enabled, the device name for all disks and not + * just those that represent shared namespaces needs to be based on the + * subsystem instance. Using the controller instance for private + * namespaces could lead to naming collisions between shared and private + * namespaces if they don't use a common numbering scheme. + * + * If multipathing is not enabled, disk names must use the controller + * instance as shared namespaces will show up as multiple block + * devices. */ - if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags)) + if (ns->head->disk) { + sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->instance, ns->head->instance); + disk->flags |= GENHD_FL_HIDDEN; + } else if (multipath) { + sprintf(disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } else { sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); + } if (nvme_update_ns_info(ns, id)) goto out_unlink_ns; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d13b81cd6225..c97d7f843977 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -9,7 +9,7 @@ #include #include "nvme.h" -static bool multipath = true; +bool multipath = true; module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); @@ -80,28 +80,6 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) blk_freeze_queue_start(h->disk->queue); } -/* - * If multipathing is enabled we need to always use the subsystem instance - * number for numbering our devices to avoid conflicts between subsystems that - * have multiple controllers and thus use the multipath-aware subsystem node - * and those that have a single controller and use the controller node - * directly. - */ -bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags) -{ - if (!multipath) - return false; - if (!ns->head->disk) { - sprintf(disk_name, "nvme%dn%d", ns->ctrl->subsys->instance, - ns->head->instance); - return true; - } - sprintf(disk_name, "nvme%dc%dn%d", ns->ctrl->subsys->instance, - ns->ctrl->instance, ns->head->instance); - *flags = GENHD_FL_HIDDEN; - return true; -} - void nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 587d92df118b..47badb76c654 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -770,7 +770,6 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys); void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys); -bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); @@ -793,20 +792,17 @@ static inline void nvme_trace_bio_complete(struct request *req) trace_block_bio_complete(ns->head->disk->queue, req->bio); } +extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; #else +#define multipath false static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) { return false; } -static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, - int *flags) -{ - return false; -} static inline void nvme_failover_req(struct request *req) { } -- cgit v1.2.3 From e559398f47e090394bbbd9006349c858e1ba80da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Mar 2022 15:53:59 +0100 Subject: nvme: remove nvme_alloc_request and nvme_alloc_request_qid Just open code the allocation + initialization in the callers. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 46 ++++++++++-------------------------------- drivers/nvme/host/ioctl.c | 3 ++- drivers/nvme/host/nvme.h | 8 ++++++-- drivers/nvme/host/pci.c | 17 +++++++++------- drivers/nvme/target/passthru.c | 3 ++- 5 files changed, 31 insertions(+), 46 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd9878671d73..c868015c1c96 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -639,13 +639,8 @@ static inline void nvme_clear_nvme_request(struct request *req) req->rq_flags |= RQF_DONTPREP; } -static inline unsigned int nvme_req_op(struct nvme_command *cmd) -{ - return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; -} - -static inline void nvme_init_request(struct request *req, - struct nvme_command *cmd) +/* initialize a passthrough request */ +void nvme_init_request(struct request *req, struct nvme_command *cmd) { if (req->q->queuedata) req->timeout = NVME_IO_TIMEOUT; @@ -661,30 +656,7 @@ static inline void nvme_init_request(struct request *req, nvme_clear_nvme_request(req); memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } - -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags) -{ - struct request *req; - - req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); - if (!IS_ERR(req)) - nvme_init_request(req, cmd); - return req; -} -EXPORT_SYMBOL_GPL(nvme_alloc_request); - -static struct request *nvme_alloc_request_qid(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) -{ - struct request *req; - - req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, - qid ? qid - 1 : 0); - if (!IS_ERR(req)) - nvme_init_request(req, cmd); - return req; -} +EXPORT_SYMBOL_GPL(nvme_init_request); /* * For something we're not in a state to send to the device the default action @@ -1110,11 +1082,14 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int ret; if (qid == NVME_QID_ANY) - req = nvme_alloc_request(q, cmd, flags); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); else - req = nvme_alloc_request_qid(q, cmd, flags, qid); + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + qid ? qid - 1 : 0); + if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, cmd); if (timeout) req->timeout = timeout; @@ -1304,14 +1279,15 @@ static void nvme_keep_alive_work(struct work_struct *work) return; } - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + rq = blk_mq_alloc_request(ctrl->admin_q, nvme_req_op(&ctrl->ka_cmd), + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (IS_ERR(rq)) { /* allocation failure, reset the controller */ dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq)); nvme_reset_ctrl(ctrl); return; } + nvme_init_request(rq, &ctrl->ka_cmd); rq->timeout = ctrl->kato * HZ; rq->end_io_data = ctrl; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index aaf3dfad2657..554566371ffa 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -66,9 +66,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = nvme_alloc_request(q, cmd, 0); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0); if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, cmd); if (timeout) req->timeout = timeout; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 47badb76c654..1ea908d43e17 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -698,9 +698,13 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); void nvme_start_freeze(struct nvme_ctrl *ctrl); +static inline unsigned int nvme_req_op(struct nvme_command *cmd) +{ + return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; +} + #define NVME_QID_ANY -1 -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags); +void nvme_init_request(struct request *req, struct nvme_command *cmd); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6a99ed680915..9f4f3884fefe 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -424,8 +424,9 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, - unsigned int hctx_idx, unsigned int numa_node) +static int nvme_pci_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -1428,12 +1429,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, aborting\n", req->tag, nvmeq->qid); - abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, - BLK_MQ_REQ_NOWAIT); + abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd), + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } + nvme_init_request(abort_req, &cmd); abort_req->end_io_data = NULL; blk_execute_rq_nowait(abort_req, false, abort_endio); @@ -1722,7 +1724,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, - .init_request = nvme_init_request, + .init_request = nvme_pci_init_request, .timeout = nvme_timeout, }; @@ -1732,7 +1734,7 @@ static const struct blk_mq_ops nvme_mq_ops = { .complete = nvme_pci_complete_rq, .commit_rqs = nvme_commit_rqs, .init_hctx = nvme_init_hctx, - .init_request = nvme_init_request, + .init_request = nvme_pci_init_request, .map_queues = nvme_pci_map_queues, .timeout = nvme_timeout, .poll = nvme_poll, @@ -2475,9 +2477,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) cmd.delete_queue.opcode = opcode; cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); - req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); + req = blk_mq_alloc_request(q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT); if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, &cmd); req->end_io_data = nvmeq; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index a810bf569fff..a4de1e0d518b 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -254,11 +254,12 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) timeout = nvmet_req_subsys(req)->admin_timeout; } - rq = nvme_alloc_request(q, req->cmd, 0); + rq = blk_mq_alloc_request(q, nvme_req_op(req->cmd), 0); if (IS_ERR(rq)) { status = NVME_SC_INTERNAL; goto out_put_ns; } + nvme_init_request(rq, req->cmd); if (timeout) rq->timeout = timeout; -- cgit v1.2.3 From ce8d78616a6b637d1b763eb18e32045687a84305 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Mar 2022 13:27:07 +0100 Subject: nvme: warn about shared namespaces without CONFIG_NVME_MULTIPATH Start warning about exposing a namespace as multiple block devices, and set a fixed deprecation release. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/block/loop.c | 1 + drivers/nvme/host/core.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8fb89d0624fc..e733c48de2e9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2092,6 +2092,7 @@ static void loop_remove(struct loop_device *lo) del_gendisk(lo->lo_disk); blk_cleanup_disk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); + mutex_lock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); mutex_unlock(&loop_ctl_mutex); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c868015c1c96..3c0461129bca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3855,6 +3855,14 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, nsid); goto out_put_ns_head; } + + if (!multipath && !list_empty(&head->list)) { + dev_warn(ctrl->device, + "Found shared namespace %d, but multipathing not supported.\n", + nsid); + dev_warn_once(ctrl->device, + "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n."); + } } list_add_tail_rcu(&ns->siblings, &head->list); -- cgit v1.2.3