diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 76 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 3 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 32 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 16 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 17 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 45 |
6 files changed, 104 insertions, 85 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f8084ded69e5..3c0461129bca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -639,13 +639,8 @@ static inline void nvme_clear_nvme_request(struct request *req) req->rq_flags |= RQF_DONTPREP; } -static inline unsigned int nvme_req_op(struct nvme_command *cmd) -{ - return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; -} - -static inline void nvme_init_request(struct request *req, - struct nvme_command *cmd) +/* initialize a passthrough request */ +void nvme_init_request(struct request *req, struct nvme_command *cmd) { if (req->q->queuedata) req->timeout = NVME_IO_TIMEOUT; @@ -661,30 +656,7 @@ static inline void nvme_init_request(struct request *req, nvme_clear_nvme_request(req); memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } - -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags) -{ - struct request *req; - - req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); - if (!IS_ERR(req)) - nvme_init_request(req, cmd); - return req; -} -EXPORT_SYMBOL_GPL(nvme_alloc_request); - -static struct request *nvme_alloc_request_qid(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) -{ - struct request *req; - - req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, - qid ? qid - 1 : 0); - if (!IS_ERR(req)) - nvme_init_request(req, cmd); - return req; -} +EXPORT_SYMBOL_GPL(nvme_init_request); /* * For something we're not in a state to send to the device the default action @@ -1110,11 +1082,14 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int ret; if (qid == NVME_QID_ANY) - req = nvme_alloc_request(q, cmd, flags); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); else - req = nvme_alloc_request_qid(q, cmd, flags, qid); + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + qid ? qid - 1 : 0); + if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, cmd); if (timeout) req->timeout = timeout; @@ -1304,14 +1279,15 @@ static void nvme_keep_alive_work(struct work_struct *work) return; } - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + rq = blk_mq_alloc_request(ctrl->admin_q, nvme_req_op(&ctrl->ka_cmd), + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (IS_ERR(rq)) { /* allocation failure, reset the controller */ dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq)); nvme_reset_ctrl(ctrl); return; } + nvme_init_request(rq, &ctrl->ka_cmd); rq->timeout = ctrl->kato * HZ; rq->end_io_data = ctrl; @@ -3879,6 +3855,14 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, nsid); goto out_put_ns_head; } + + if (!multipath && !list_empty(&head->list)) { + dev_warn(ctrl->device, + "Found shared namespace %d, but multipathing not supported.\n", + nsid); + dev_warn_once(ctrl->device, + "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n."); + } } list_add_tail_rcu(&ns->siblings, &head->list); @@ -3967,13 +3951,27 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_cleanup_disk; /* - * Without the multipath code enabled, multiple controller per - * subsystems are visible as devices and thus we cannot use the - * subsystem instance. + * If multipathing is enabled, the device name for all disks and not + * just those that represent shared namespaces needs to be based on the + * subsystem instance. Using the controller instance for private + * namespaces could lead to naming collisions between shared and private + * namespaces if they don't use a common numbering scheme. + * + * If multipathing is not enabled, disk names must use the controller + * instance as shared namespaces will show up as multiple block + * devices. */ - if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags)) + if (ns->head->disk) { + sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->instance, ns->head->instance); + disk->flags |= GENHD_FL_HIDDEN; + } else if (multipath) { + sprintf(disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } else { sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); + } if (nvme_update_ns_info(ns, id)) goto out_unlink_ns; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index aaf3dfad2657..554566371ffa 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -66,9 +66,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = nvme_alloc_request(q, cmd, 0); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0); if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, cmd); if (timeout) req->timeout = timeout; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f8bf6606eb2f..c97d7f843977 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -5,10 +5,11 @@ #include <linux/backing-dev.h> #include <linux/moduleparam.h> +#include <linux/vmalloc.h> #include <trace/events/block.h> #include "nvme.h" -static bool multipath = true; +bool multipath = true; module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); @@ -79,28 +80,6 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) blk_freeze_queue_start(h->disk->queue); } -/* - * If multipathing is enabled we need to always use the subsystem instance - * number for numbering our devices to avoid conflicts between subsystems that - * have multiple controllers and thus use the multipath-aware subsystem node - * and those that have a single controller and use the controller node - * directly. - */ -bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags) -{ - if (!multipath) - return false; - if (!ns->head->disk) { - sprintf(disk_name, "nvme%dn%d", ns->ctrl->subsys->instance, - ns->head->instance); - return true; - } - sprintf(disk_name, "nvme%dc%dn%d", ns->ctrl->subsys->instance, - ns->ctrl->instance, ns->head->instance); - *flags = GENHD_FL_HIDDEN; - return true; -} - void nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -386,8 +365,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio) } else { dev_warn_ratelimited(dev, "no available path - failing I/O\n"); - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); } srcu_read_unlock(&head->srcu, srcu_idx); @@ -898,7 +876,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) if (ana_log_size > ctrl->ana_log_size) { nvme_mpath_stop(ctrl); nvme_mpath_uninit(ctrl); - ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL); + ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL); if (!ctrl->ana_log_buf) return -ENOMEM; } @@ -915,7 +893,7 @@ out_uninit: void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { - kfree(ctrl->ana_log_buf); + kvfree(ctrl->ana_log_buf); ctrl->ana_log_buf = NULL; ctrl->ana_log_size = 0; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 587d92df118b..1ea908d43e17 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -698,9 +698,13 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); void nvme_start_freeze(struct nvme_ctrl *ctrl); +static inline unsigned int nvme_req_op(struct nvme_command *cmd) +{ + return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; +} + #define NVME_QID_ANY -1 -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags); +void nvme_init_request(struct request *req, struct nvme_command *cmd); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, @@ -770,7 +774,6 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys); void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys); -bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); @@ -793,20 +796,17 @@ static inline void nvme_trace_bio_complete(struct request *req) trace_block_bio_complete(ns->head->disk->queue, req->bio); } +extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; #else +#define multipath false static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) { return false; } -static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, - int *flags) -{ - return false; -} static inline void nvme_failover_req(struct request *req) { } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6a99ed680915..9f4f3884fefe 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -424,8 +424,9 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, - unsigned int hctx_idx, unsigned int numa_node) +static int nvme_pci_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -1428,12 +1429,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, aborting\n", req->tag, nvmeq->qid); - abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, - BLK_MQ_REQ_NOWAIT); + abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd), + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } + nvme_init_request(abort_req, &cmd); abort_req->end_io_data = NULL; blk_execute_rq_nowait(abort_req, false, abort_endio); @@ -1722,7 +1724,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, - .init_request = nvme_init_request, + .init_request = nvme_pci_init_request, .timeout = nvme_timeout, }; @@ -1732,7 +1734,7 @@ static const struct blk_mq_ops nvme_mq_ops = { .complete = nvme_pci_complete_rq, .commit_rqs = nvme_commit_rqs, .init_hctx = nvme_init_hctx, - .init_request = nvme_init_request, + .init_request = nvme_pci_init_request, .map_queues = nvme_pci_map_queues, .timeout = nvme_timeout, .poll = nvme_poll, @@ -2475,9 +2477,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) cmd.delete_queue.opcode = opcode; cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); - req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); + req = blk_mq_alloc_request(q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT); if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, &cmd); req->end_io_data = nvmeq; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 6cbcc8b4daaf..10fc45d95b86 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -30,6 +30,44 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* lockdep can detect a circular dependency of the form + * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock + * because dependencies are tracked for both nvme-tcp and user contexts. Using + * a separate class prevents lockdep from conflating nvme-tcp socket use with + * user-space socket API use. + */ +static struct lock_class_key nvme_tcp_sk_key[2]; +static struct lock_class_key nvme_tcp_slock_key[2]; + +static void nvme_tcp_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", + &nvme_tcp_slock_key[0], + "sk_lock-AF_INET-NVME", + &nvme_tcp_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", + &nvme_tcp_slock_key[1], + "sk_lock-AF_INET6-NVME", + &nvme_tcp_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +} +#else +static void nvme_tcp_reclassify_socket(struct socket *sock) { } +#endif + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1427,6 +1465,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_destroy_mutex; } + nvme_tcp_reclassify_socket(queue->sock); + /* Single syn retry */ tcp_sock_set_syncnt(queue->sock->sk, 1); @@ -1674,7 +1714,7 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) { - int i, ret = 0; + int i, ret; for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_tcp_start_queue(ctrl, i); @@ -1714,8 +1754,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_tcp_alloc_queue(ctrl, i, - ctrl->sqsize + 1); + ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1); if (ret) goto out_free_queues; } |