diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-07 09:19:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-07 09:19:14 -0700 |
commit | 513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch) | |
tree | c71e478fab1568da4706868b14eb67a75c148a8b /drivers/nvme | |
parent | 0a78a376ef3c2f3d397df48909f00cd75f92137a (diff) | |
parent | 30514bd2dd4e86a3ecfd6a93a3eadf7b9ea164a0 (diff) |
Merge tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull requests via Christoph:
- handle number of queue changes in the TCP and RDMA drivers
(Daniel Wagner)
- allow changing the number of queues in nvmet (Daniel Wagner)
- also consider host_iface when checking ip options (Daniel
Wagner)
- don't map pages which can't come from HIGHMEM (Fabio M. De
Francesco)
- avoid unnecessary flush bios in nvmet (Guixin Liu)
- shrink and better pack the nvme_iod structure (Keith Busch)
- add comment for unaligned "fake" nqn (Linjun Bao)
- print actual source IP address through sysfs "address" attr
(Martin Belanger)
- various cleanups (Jackie Liu, Wolfram Sang, Genjian Zhang)
- handle effects after freeing the request (Keith Busch)
- copy firmware_rev on each init (Keith Busch)
- restrict management ioctls to admin (Keith Busch)
- ensure subsystem reset is single threaded (Keith Busch)
- report the actual number of tagset maps in nvme-pci (Keith
Busch)
- small fabrics authentication fixups (Christoph Hellwig)
- add common code for tagset allocation and freeing (Christoph
Hellwig)
- stop using the request_queue in nvmet (Christoph Hellwig)
- set min_align_mask before calculating max_hw_sectors (Rishabh
Bhatnagar)
- send a rediscover uevent when a persistent discovery controller
reconnects (Sagi Grimberg)
- misc nvmet-tcp fixes (Varun Prakash, zhenwei pi)
- MD pull request via Song:
- Various raid5 fix and clean up, by Logan Gunthorpe and David
Sloan.
- Raid10 performance optimization, by Yu Kuai.
- sbitmap wakeup hang fixes (Hugh, Keith, Jan, Yu)
- IO scheduler switching quisce fix (Keith)
- s390/dasd block driver updates (Stefan)
- support for recovery for the ublk driver (ZiyangZhang)
- rnbd drivers fixes and updates (Guoqing, Santosh, ye, Christoph)
- blk-mq and null_blk map fixes (Bart)
- various bcache fixes (Coly, Jilin, Jules)
- nbd signal hang fix (Shigeru)
- block writeback throttling fix (Yu)
- optimize the passthrough mapping handling (me)
- prepare block cgroups to being gendisk based (Christoph)
- get rid of an old PSI hack in the block layer, moving it to the
callers instead where it belongs (Christoph)
- blk-throttle fixes and cleanups (Yu)
- misc fixes and cleanups (Liu Shixin, Liu Song, Miaohe, Pankaj,
Ping-Xiang, Wolfram, Saurabh, Li Jinlin, Li Lei, Lin, Li zeming,
Miaohe, Bart, Coly, Gaosheng
* tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux: (162 commits)
sbitmap: fix lockup while swapping
block: add rationale for not using blk_mq_plug() when applicable
block: adapt blk_mq_plug() to not plug for writes that require a zone lock
s390/dasd: use blk_mq_alloc_disk
blk-cgroup: don't update the blkg lookup hint in blkg_conf_prep
nvmet: don't look at the request_queue in nvmet_bdev_set_limits
nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all
blk-mq: use quiesced elevator switch when reinitializing queues
block: replace blk_queue_nowait with bdev_nowait
nvme: remove nvme_ctrl_init_connect_q
nvme-loop: use the tagset alloc/free helpers
nvme-loop: store the generic nvme_ctrl in set->driver_data
nvme-loop: initialize sqsize later
nvme-fc: use the tagset alloc/free helpers
nvme-fc: store the generic nvme_ctrl in set->driver_data
nvme-fc: keep ctrl->sqsize in sync with opts->queue_size
nvme-rdma: use the tagset alloc/free helpers
nvme-rdma: store the generic nvme_ctrl in set->driver_data
nvme-tcp: use the tagset alloc/free helpers
nvme-tcp: store the generic nvme_ctrl in set->driver_data
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 140 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 25 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 124 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 15 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 44 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 78 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 171 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 169 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 29 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 1 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd-auth.c | 23 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 19 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-bdev.c | 19 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 91 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 7 | ||||
-rw-r--r-- | drivers/nvme/target/passthru.c | 7 | ||||
-rw-r--r-- | drivers/nvme/target/tcp.c | 91 | ||||
-rw-r--r-- | drivers/nvme/target/zns.c | 3 |
20 files changed, 525 insertions, 535 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 45ef8e8ddc84..64f599a64a7f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1111,8 +1111,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, - struct nvme_command *cmd, int status) +void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, + struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { nvme_unfreeze(ctrl); @@ -1148,21 +1148,16 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, break; } } +EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU); -int nvme_execute_passthru_rq(struct request *rq) +int nvme_execute_passthru_rq(struct request *rq, u32 *effects) { struct nvme_command *cmd = nvme_req(rq)->cmd; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; struct nvme_ns *ns = rq->q->queuedata; - u32 effects; - int ret; - effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - ret = nvme_execute_rq(rq, false); - if (effects) /* nothing to be done for zero cmd effects */ - nvme_passthru_end(ctrl, effects, cmd, ret); - - return ret; + *effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); + return nvme_execute_rq(rq, false); } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); @@ -2696,7 +2691,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) { nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + strscpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } @@ -2704,7 +2699,11 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); } - /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ + /* + * Generate a "fake" NQN similar to the one in Section 4.5 of the NVMe + * Base Specification 2.0. It is slightly different from the format + * specified there due to historic reasons, and we can't change it now. + */ off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, "nqn.2014.08.org.nvmexpress:%04x%04x", le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); @@ -2894,7 +2893,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nvme_init_subnqn(subsys, ctrl, id); memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); memcpy(subsys->model, id->mn, sizeof(subsys->model)); - memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; @@ -3113,6 +3111,8 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->quirks |= core_quirks[i].quirks; } } + memcpy(ctrl->subsys->firmware_rev, id->fr, + sizeof(ctrl->subsys->firmware_rev)); if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); @@ -4805,6 +4805,108 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, } EXPORT_SYMBOL_GPL(nvme_complete_async_event); +int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size) +{ + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; + if (ctrl->ops->flags & NVME_F_FABRICS) + set->reserved_tags = NVMF_RESERVED_TAGS; + set->numa_node = ctrl->numa_node; + set->flags = flags; + set->cmd_size = cmd_size; + set->driver_data = ctrl; + set->nr_hw_queues = 1; + set->timeout = NVME_ADMIN_TIMEOUT; + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ret; + + ctrl->admin_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->admin_q)) { + ret = PTR_ERR(ctrl->admin_q); + goto out_free_tagset; + } + + if (ctrl->ops->flags & NVME_F_FABRICS) { + ctrl->fabrics_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->fabrics_q)) { + ret = PTR_ERR(ctrl->fabrics_q); + goto out_cleanup_admin_q; + } + } + + ctrl->admin_tagset = set; + return 0; + +out_cleanup_admin_q: + blk_mq_destroy_queue(ctrl->fabrics_q); +out_free_tagset: + blk_mq_free_tag_set(ctrl->admin_tagset); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set); + +void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl) +{ + blk_mq_destroy_queue(ctrl->admin_q); + if (ctrl->ops->flags & NVME_F_FABRICS) + blk_mq_destroy_queue(ctrl->fabrics_q); + blk_mq_free_tag_set(ctrl->admin_tagset); +} +EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set); + +int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size) +{ + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->queue_depth = ctrl->sqsize + 1; + set->reserved_tags = NVMF_RESERVED_TAGS; + set->numa_node = ctrl->numa_node; + set->flags = flags; + set->cmd_size = cmd_size, + set->driver_data = ctrl; + set->nr_hw_queues = ctrl->queue_count - 1; + set->timeout = NVME_IO_TIMEOUT; + if (ops->map_queues) + set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ret; + + if (ctrl->ops->flags & NVME_F_FABRICS) { + ctrl->connect_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->connect_q)) { + ret = PTR_ERR(ctrl->connect_q); + goto out_free_tag_set; + } + } + + ctrl->tagset = set; + return 0; + +out_free_tag_set: + blk_mq_free_tag_set(set); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_alloc_io_tag_set); + +void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl) +{ + if (ctrl->ops->flags & NVME_F_FABRICS) + blk_mq_destroy_queue(ctrl->connect_q); + blk_mq_free_tag_set(ctrl->tagset); +} +EXPORT_SYMBOL_GPL(nvme_remove_io_tag_set); + void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); @@ -4824,6 +4926,16 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_enable_aen(ctrl); + /* + * persistent discovery controllers need to send indication to userspace + * to re-read the discovery log page to learn about possible changes + * that were missed. We identify persistent discovery controllers by + * checking that they started once before, hence are reconnecting back. + */ + if (test_and_set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) && + nvme_discovery_ctrl(ctrl)) + nvme_change_uevent(ctrl, "NVME_EVENT=rediscover"); + if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 10cc4a814602..ce27276f552d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -49,7 +49,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) goto out_unlock; kref_init(&host->ref); - strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE); list_add_tail(&host->list, &nvmf_hosts); out_unlock: @@ -971,13 +971,17 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, return false; /* - * Checking the local address is rough. In most cases, none is specified - * and the host port is selected by the stack. + * Checking the local address or host interfaces is rough. + * + * In most cases, none is specified and the host port or + * host interface is selected by the stack. * * Assume no match if: - * - local address is specified and address is not the same - * - local address is not specified but remote is, or vice versa - * (admin using specific host_traddr when it matters). + * - local address or host interface is specified and address + * or host interface is not the same + * - local address or host interface is not specified but + * remote is, or vice versa (admin using specific + * host_traddr/host_iface when it matters). */ if ((opts->mask & NVMF_OPT_HOST_TRADDR) && (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { @@ -988,6 +992,15 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, return false; } + if ((opts->mask & NVMF_OPT_HOST_IFACE) && + (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) { + if (strcmp(opts->host_iface, ctrl->opts->host_iface)) + return false; + } else if ((opts->mask & NVMF_OPT_HOST_IFACE) || + (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) { + return false; + } + return true; } EXPORT_SYMBOL_GPL(nvmf_ip_options_match); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 127abaf9ba5d..5d57a042dbca 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1829,7 +1829,7 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - return __nvme_fc_exit_request(set->driver_data, op); + return __nvme_fc_exit_request(to_fc_ctrl(set->driver_data), op); } static int @@ -2135,7 +2135,7 @@ static int nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = set->driver_data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; @@ -2206,36 +2206,28 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) } } -static inline void -__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, - unsigned int qidx) +static inline int +__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int qidx) { + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(data); struct nvme_fc_queue *queue = &ctrl->queues[qidx]; hctx->driver_data = queue; queue->hctx = hctx; + return 0; } static int -nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int hctx_idx) +nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx + 1); } static int nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx); } static void @@ -2391,10 +2383,8 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->ctrl.tagset) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - } + if (ctrl->ctrl.tagset) + nvme_remove_io_tag_set(&ctrl->ctrl); /* remove from rport list */ spin_lock_irqsave(&ctrl->rport->lock, flags); @@ -2402,9 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref) spin_unlock_irqrestore(&ctrl->rport->lock, flags); nvme_start_admin_queue(&ctrl->ctrl); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); kfree(ctrl->queues); @@ -2860,9 +2848,9 @@ nvme_fc_complete_rq(struct request *rq) nvme_fc_ctrl_put(ctrl); } -static int nvme_fc_map_queues(struct blk_mq_tag_set *set) +static void nvme_fc_map_queues(struct blk_mq_tag_set *set) { - struct nvme_fc_ctrl *ctrl = set->driver_data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); int i; for (i = 0; i < set->nr_maps; i++) { @@ -2880,7 +2868,6 @@ static int nvme_fc_map_queues(struct blk_mq_tag_set *set) else blk_mq_map_queues(map); } - return 0; } static const struct blk_mq_ops nvme_fc_mq_ops = { @@ -2915,32 +2902,16 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz); - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, + &nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE, + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) return ret; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tag_set; - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) - goto out_cleanup_blk_queue; + goto out_cleanup_tagset; ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) @@ -2952,10 +2923,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); +out_cleanup_tagset: + nvme_remove_io_tag_set(&ctrl->ctrl); nvme_fc_free_io_queues(ctrl); /* force put free routine to ignore io queues */ @@ -3166,15 +3135,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) "to maxcmd\n", opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; - } - - if (opts->queue_size > ctrl->ctrl.sqsize + 1) { - /* warn if sqsize is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl sqsize %u, reducing " - "to sqsize\n", - opts->queue_size, ctrl->ctrl.sqsize + 1); - opts->queue_size = ctrl->ctrl.sqsize + 1; + ctrl->ctrl.sqsize = opts->queue_size - 1; } ret = nvme_fc_init_aen_ops(ctrl); @@ -3547,35 +3508,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, nvme_fc_init_queue(ctrl, 0); - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->admin_tag_set.cmd_size = - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz); - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; - ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; - - ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_fc_admin_mq_ops, BLK_MQ_F_NO_SCHED, + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) goto out_free_queues; - ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; - - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - ret = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_admin_tag_set; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - ret = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } /* * Would have been nice to init io queues tag set as well. @@ -3586,7 +3524,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); if (ret) - goto out_cleanup_admin_q; + goto out_cleanup_tagset; /* at this point, teardown path changes to ref counting on nvme ctrl */ @@ -3641,12 +3579,8 @@ fail_ctrl: return ERR_PTR(-EIO); -out_cleanup_admin_q: - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_admin_tag_set: - blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_cleanup_tagset: + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_queues: kfree(ctrl->queues); out_free_ida: diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 548aca8b5b9f..357791ff0623 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -136,9 +136,11 @@ static int nvme_submit_user_cmd(struct request_queue *q, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, bool vec) { + struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; struct bio *bio; + u32 effects; int ret; req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer, @@ -147,8 +149,9 @@ static int nvme_submit_user_cmd(struct request_queue *q, return PTR_ERR(req); bio = req->bio; + ctrl = nvme_req(req)->ctrl; - ret = nvme_execute_passthru_rq(req); + ret = nvme_execute_passthru_rq(req, &effects); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); @@ -158,6 +161,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (bio) blk_rq_unmap_user(bio); blk_mq_free_request(req); + + if (effects) + nvme_passthru_end(ctrl, effects, cmd, ret); + return ret; } @@ -824,11 +831,17 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; dev_warn(ctrl->device, "resetting controller\n"); return nvme_reset_ctrl_sync(ctrl); case NVME_IOCTL_SUBSYS_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return nvme_reset_subsystem(ctrl); case NVME_IOCTL_RESCAN: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; nvme_queue_scan(ctrl); return 0; default: diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 216acbe953b3..a29877217ee6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -233,6 +233,12 @@ struct nvme_fault_inject { #endif }; +enum nvme_ctrl_flags { + NVME_CTRL_FAILFAST_EXPIRED = 0, + NVME_CTRL_ADMIN_Q_STOPPED = 1, + NVME_CTRL_STARTED_ONCE = 2, +}; + struct nvme_ctrl { bool comp_seen; enum nvme_ctrl_state state; @@ -354,8 +360,6 @@ struct nvme_ctrl { u16 maxcmd; int nr_reconnects; unsigned long flags; -#define NVME_CTRL_FAILFAST_EXPIRED 0 -#define NVME_CTRL_ADMIN_Q_STOPPED 1 struct nvmf_ctrl_options *opts; struct page *discard_page; @@ -602,11 +606,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) static inline void nvme_should_fail(struct request *req) {} #endif +bool nvme_wait_reset(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); + static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { + int ret; + if (!ctrl->subsystem) return -ENOTTY; - return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (!nvme_wait_reset(ctrl)) + return -EBUSY; + + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (ret) + return ret; + + return nvme_try_sched_reset(ctrl); } /* @@ -712,7 +728,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); -bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -722,6 +737,14 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl); +int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size); +void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl); +int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size); +void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); @@ -802,7 +825,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); -int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); void nvme_queue_scan(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, @@ -972,14 +994,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif -static inline int nvme_ctrl_init_connect_q(struct nvme_ctrl *ctrl) -{ - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) - return PTR_ERR(ctrl->connect_q); - return 0; -} - static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; @@ -1027,7 +1041,9 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); -int nvme_execute_passthru_rq(struct request *rq); +int nvme_execute_passthru_rq(struct request *rq, u32 *effects); +void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, + struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); void nvme_put_ns(struct nvme_ns *ns); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 67d3335e9cc8..9aafc1ed6439 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -226,12 +226,12 @@ struct nvme_queue { struct nvme_iod { struct nvme_request req; struct nvme_command cmd; - struct nvme_queue *nvmeq; bool use_sgl; - int aborted; - int npages; /* In the PRP list. 0 means small pool in use */ - dma_addr_t first_dma; + bool aborted; + s8 nr_allocations; /* PRP list pool allocations. 0 means small + pool in use */ unsigned int dma_len; /* length of single DMA segment mapping */ + dma_addr_t first_dma; dma_addr_t meta_dma; struct sg_table sgt; }; @@ -430,11 +430,6 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set, { struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0; - struct nvme_queue *nvmeq = &dev->queues[queue_idx]; - - BUG_ON(!nvmeq); - iod->nvmeq = nvmeq; nvme_req(req)->ctrl = &dev->ctrl; nvme_req(req)->cmd = &iod->cmd; @@ -450,7 +445,7 @@ static int queue_irq_offset(struct nvme_dev *dev) return 0; } -static int nvme_pci_map_queues(struct blk_mq_tag_set *set) +static void nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; int i, qoff, offset; @@ -477,8 +472,6 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) qoff += map->nr_queues; offset += map->nr_queues; } - - return 0; } /* @@ -528,7 +521,7 @@ static void **nvme_pci_iod_list(struct request *req) static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; int nseg = blk_rq_nr_phys_segments(req); unsigned int avg_seg_size; @@ -536,7 +529,7 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) if (!nvme_ctrl_sgl_supported(&dev->ctrl)) return false; - if (!iod->nvmeq->qid) + if (!nvmeq->qid) return false; if (!sgl_threshold || avg_seg_size < sgl_threshold) return false; @@ -550,7 +543,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) dma_addr_t dma_addr = iod->first_dma; int i; - for (i = 0; i < iod->npages; i++) { + for (i = 0; i < iod->nr_allocations; i++) { __le64 *prp_list = nvme_pci_iod_list(req)[i]; dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); @@ -566,7 +559,7 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) dma_addr_t dma_addr = iod->first_dma; int i; - for (i = 0; i < iod->npages; i++) { + for (i = 0; i < iod->nr_allocations; i++) { struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); @@ -589,7 +582,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); - if (iod->npages == 0) + if (iod->nr_allocations == 0) dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], iod->first_dma); else if (iod->use_sgl) @@ -651,15 +644,15 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); if (nprps <= (256 / 8)) { pool = dev->prp_small_pool; - iod->npages = 0; + iod->nr_allocations = 0; } else { pool = dev->prp_page_pool; - iod->npages = 1; + iod->nr_allocations = 1; } prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) { - iod->npages = -1; + iod->nr_allocations = -1; return BLK_STS_RESOURCE; } list[0] = prp_list; @@ -671,7 +664,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) goto free_prps; - list[iod->npages++] = prp_list; + list[iod->nr_allocations++] = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); i = 1; @@ -746,15 +739,15 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, if (entries <= (256 / sizeof(struct nvme_sgl_desc))) { pool = dev->prp_small_pool; - iod->npages = 0; + iod->nr_allocations = 0; } else { pool = dev->prp_page_pool; - iod->npages = 1; + iod->nr_allocations = 1; } sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); if (!sg_list) { - iod->npages = -1; + iod->nr_allocations = -1; return BLK_STS_RESOURCE; } @@ -773,7 +766,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, goto free_sgls; i = 0; - nvme_pci_iod_list(req)[iod->npages++] = sg_list; + nvme_pci_iod_list(req)[iod->nr_allocations++] = sg_list; sg_list[i++] = *link; nvme_pci_sgl_set_seg(link, sgl_dma, entries); } @@ -833,6 +826,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, int rc; if (blk_rq_nr_phys_segments(req) == 1) { + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct bio_vec bv = req_bvec(req); if (!is_pci_p2pdma_page(bv.bv_page)) { @@ -840,7 +834,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, return nvme_setup_prp_simple(dev, req, &cmnd->rw, &bv); - if (iod->nvmeq->qid && sgl_threshold && + if (nvmeq->qid && sgl_threshold && nvme_ctrl_sgl_supported(&dev->ctrl)) return nvme_setup_sgl_simple(dev, req, &cmnd->rw, &bv); @@ -898,8 +892,8 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; - iod->aborted = 0; - iod->npages = -1; + iod->aborted = false; + iod->nr_allocations = -1; iod->sgt.nents = 0; ret = nvme_setup_cmd(req->q->queuedata, req); @@ -1019,12 +1013,16 @@ static void nvme_queue_rqs(struct request **rqlist) static __always_inline void nvme_pci_unmap_rq(struct request *req) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_dev *dev = iod->nvmeq->dev; + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + + if (blk_integrity_rq(req)) { + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - if (blk_integrity_rq(req)) dma_unmap_page(dev->dev, iod->meta_dma, rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + } + if (blk_rq_nr_phys_segments(req)) nvme_unmap_data(dev, req); } @@ -1272,8 +1270,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) static void abort_endio(struct request *req, blk_status_t error) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = iod->nvmeq; + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", nvme_req(req)->status); @@ -1335,7 +1332,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) static enum blk_eh_timer_return nvme_timeout(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = iod->nvmeq; + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd = { }; @@ -1416,7 +1413,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } - iod->aborted = 1; + iod->aborted = true; cmd.abort.opcode = nvme_admin_abort_cmd; cmd.abort.cid = nvme_cid(req); @@ -2529,9 +2526,11 @@ static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) set->ops = &nvme_mq_ops; set->nr_hw_queues = dev->online_queues - 1; - set->nr_maps = 2; /* default + read */ + set->nr_maps = 1; + if (dev->io_queues[HCTX_TYPE_READ]) + set->nr_maps = 2; if (dev->io_queues[HCTX_TYPE_POLL]) - set->nr_maps++; + set->nr_maps = 3; set->timeout = NVME_IO_TIMEOUT; set->numa_node = dev->ctrl.numa_node; set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; @@ -2834,6 +2833,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_start_admin_queue(&dev->ctrl); } + dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); + /* * Limit the max command size to prevent iod->sg allocations going * over a single page. @@ -2846,7 +2847,6 @@ static void nvme_reset_work(struct work_struct *work) * Don't limit the IOMMU merged segment size. */ dma_set_max_seg_size(dev->dev, 0xffffffff); - dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); mutex_unlock(&dev->shutdown_lock); @@ -3569,6 +3569,8 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); + BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) > + S8_MAX); return pci_register_driver(&nvme_driver); } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3100643be299..5ad0ab2853a4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -295,7 +295,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; @@ -320,7 +320,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1]; BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); @@ -332,7 +332,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[0]; BUG_ON(hctx_idx != 0); @@ -696,11 +696,12 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) return ret; } -static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl, + int first, int last) { int i, ret = 0; - for (i = 1; i < ctrl->ctrl.queue_count; i++) { + for (i = first; i < last; i++) { ret = nvme_rdma_start_queue(ctrl, i); if (ret) goto out_stop_queues; @@ -709,7 +710,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_stop_queues: - for (i--; i >= 1; i--) + for (i--; i >= first; i--) nvme_rdma_stop_queue(&ctrl->queues[i]); return ret; } @@ -787,64 +788,21 @@ out_free_queues: return ret; } -static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl) +static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl) { - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->admin_tag_set; - int ret; + unsigned int cmd_size = sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_admin_mq_ops; - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - set->driver_data = ctrl; - set->nr_hw_queues = 1; - set->timeout = NVME_ADMIN_TIMEOUT; - set->flags = BLK_MQ_F_NO_SCHED; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.admin_tagset = set; - return ret; -} - -static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->tag_set; - int ret; + if (ctrl->max_integrity_segments) + cmd_size += sizeof(struct nvme_rdma_sgl) + + NVME_RDMA_METADATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_mq_ops; - set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - if (nctrl->max_integrity_segments) - set->cmd_size += sizeof(struct nvme_rdma_sgl) + - NVME_RDMA_METADATA_SGL_SIZE; - set->driver_data = ctrl; - set->nr_hw_queues = nctrl->queue_count - 1; - set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.tagset = set; - return ret; + return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set, + &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, cmd_size); } -static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, - bool remove) +static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) { - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); - } if (ctrl->async_event_sqe.data) { cancel_work_sync(&ctrl->ctrl.async_event_work); nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -886,26 +844,19 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, goto out_free_queue; if (new) { - error = nvme_rdma_alloc_admin_tag_set(&ctrl->ctrl); + error = nvme_alloc_admin_tag_set(&ctrl->ctrl, + &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops, + BLK_MQ_F_NO_SCHED, + sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE); if (error) goto out_free_async_qe; - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - error = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_tagset; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } } error = nvme_rdma_start_queue(ctrl, 0); if (error) - goto out_cleanup_queue; + goto out_remove_admin_tag_set; error = nvme_enable_ctrl(&ctrl->ctrl); if (error) @@ -932,15 +883,9 @@ out_quiesce_queue: out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); -out_cleanup_queue: - if (new) - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: +out_remove_admin_tag_set: if (new) - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_tagset: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_async_qe: if (ctrl->async_event_sqe.data) { nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -952,19 +897,9 @@ out_free_queue: return error; } -static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, - bool remove) -{ - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(ctrl->ctrl.tagset); - } - nvme_rdma_free_io_queues(ctrl); -} - static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) { - int ret; + int ret, nr_queues; ret = nvme_rdma_alloc_io_queues(ctrl); if (ret) @@ -974,15 +909,17 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl); if (ret) goto out_free_io_queues; - - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tag_set; } - ret = nvme_rdma_start_io_queues(ctrl); + /* + * Only start IO queues for which we have allocated the tagset + * and limitted it to the available queues. On reconnects, the + * queue number might have changed. + */ + nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count); + ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues); if (ret) - goto out_cleanup_connect_q; + goto out_cleanup_tagset; if (!new) { nvme_start_queues(&ctrl->ctrl); @@ -1000,19 +937,25 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) nvme_unfreeze(&ctrl->ctrl); } + /* + * If the number of queues has increased (reconnect case) + * start all new queues now. + */ + ret = nvme_rdma_start_io_queues(ctrl, nr_queues, + ctrl->tag_set.nr_hw_queues + 1); + if (ret) + goto out_wait_freeze_timed_out; + return 0; out_wait_freeze_timed_out: nvme_stop_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); -out_cleanup_connect_q: +out_cleanup_tagset: nvme_cancel_tagset(&ctrl->ctrl); if (new) - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.tagset); + nvme_remove_io_tag_set(&ctrl->ctrl); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; @@ -1025,9 +968,11 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_admin_queue(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, remove); + nvme_remove_admin_tag_set(&ctrl->ctrl); + } + nvme_rdma_destroy_admin_queue(ctrl); } static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, @@ -1039,9 +984,11 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_queues(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, remove); + nvme_remove_io_tag_set(&ctrl->ctrl); + } + nvme_rdma_free_io_queues(ctrl); } } @@ -1163,14 +1110,18 @@ destroy_io: nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, new); + if (new) + nvme_remove_io_tag_set(&ctrl->ctrl); + nvme_rdma_free_io_queues(ctrl); } destroy_admin: nvme_stop_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, new); + if (new) + nvme_remove_admin_tag_set(&ctrl->ctrl); + nvme_rdma_destroy_admin_queue(ctrl); return ret; } @@ -2188,9 +2139,9 @@ static void nvme_rdma_complete_rq(struct request *rq) nvme_complete_rq(rq); } -static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) +static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { @@ -2231,8 +2182,6 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) ctrl->io_queues[HCTX_TYPE_DEFAULT], ctrl->io_queues[HCTX_TYPE_READ], ctrl->io_queues[HCTX_TYPE_POLL]); - - return 0; } static const struct blk_mq_ops nvme_rdma_mq_ops = { diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d5871fd6f769..93e2e313fa70 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -133,7 +133,6 @@ struct nvme_tcp_queue { /* send state */ struct nvme_tcp_request *request; - int queue_size; u32 maxh2cdata; size_t cmnd_capsule_len; struct nvme_tcp_ctrl *ctrl; @@ -463,7 +462,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_tcp_ctrl *ctrl = set->driver_data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_tcp_cmd_pdu *pdu; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; @@ -487,7 +486,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1]; hctx->driver_data = queue; @@ -497,7 +496,7 @@ static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); struct nvme_tcp_queue *queue = &ctrl->queues[0]; hctx->driver_data = queue; @@ -1476,8 +1475,7 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); } -static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, - int qid, size_t queue_size) +static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1489,7 +1487,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, INIT_LIST_HEAD(&queue->send_list); mutex_init(&queue->send_mutex); INIT_WORK(&queue->io_work, nvme_tcp_io_work); - queue->queue_size = queue_size; if (qid > 0) queue->cmnd_capsule_len = nctrl->ioccsz * 16; @@ -1687,51 +1684,6 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) return ret; } -static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->admin_tag_set; - int ret; - - memset(set, 0, sizeof(*set)); - set->ops = &nvme_tcp_admin_mq_ops; - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_BLOCKING; - set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = ctrl; - set->nr_hw_queues = 1; - set->timeout = NVME_ADMIN_TIMEOUT; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - nctrl->admin_tagset = set; - return ret; -} - -static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->tag_set; - int ret; - - memset(set, 0, sizeof(*set)); - set->ops = &nvme_tcp_mq_ops; - set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; - set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = ctrl; - set->nr_hw_queues = nctrl->queue_count - 1; - set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - nctrl->tagset = set; - return ret; -} - static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) { if (to_tcp_ctrl(ctrl)->async_req.pdu) { @@ -1759,11 +1711,12 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) nvme_tcp_stop_queue(ctrl, i); } -static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) +static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, + int first, int last) { int i, ret; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = first; i < last; i++) { ret = nvme_tcp_start_queue(ctrl, i); if (ret) goto out_stop_queues; @@ -1772,7 +1725,7 @@ static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) return 0; out_stop_queues: - for (i--; i >= 1; i--) + for (i--; i >= first; i--) nvme_tcp_stop_queue(ctrl, i); return ret; } @@ -1781,7 +1734,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) { int ret; - ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); + ret = nvme_tcp_alloc_queue(ctrl, 0); if (ret) return ret; @@ -1801,7 +1754,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1); + ret = nvme_tcp_alloc_queue(ctrl, i); if (ret) goto out_free_queues; } @@ -1889,32 +1842,35 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_io_queues(ctrl); - if (remove) { - blk_mq_destroy_queue(ctrl->connect_q); - blk_mq_free_tag_set(ctrl->tagset); - } + if (remove) + nvme_remove_io_tag_set(ctrl); nvme_tcp_free_io_queues(ctrl); } static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) { - int ret; + int ret, nr_queues; ret = nvme_tcp_alloc_io_queues(ctrl); if (ret) return ret; if (new) { - ret = nvme_tcp_alloc_tag_set(ctrl); + ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, + &nvme_tcp_mq_ops, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING, + sizeof(struct nvme_tcp_request)); if (ret) goto out_free_io_queues; - - ret = nvme_ctrl_init_connect_q(ctrl); - if (ret) - goto out_free_tag_set; } - ret = nvme_tcp_start_io_queues(ctrl); + /* + * Only start IO queues for which we have allocated the tagset + * and limitted it to the available queues. On reconnects, the + * queue number might have changed. + */ + nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count); + ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues); if (ret) goto out_cleanup_connect_q; @@ -1934,6 +1890,15 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) nvme_unfreeze(ctrl); } + /* + * If the number of queues has increased (reconnect case) + * start all new queues now. + */ + ret = nvme_tcp_start_io_queues(ctrl, nr_queues, + ctrl->tagset->nr_hw_queues + 1); + if (ret) + goto out_wait_freeze_timed_out; + return 0; out_wait_freeze_timed_out: @@ -1943,10 +1908,7 @@ out_wait_freeze_timed_out: out_cleanup_connect_q: nvme_cancel_tagset(ctrl); if (new) - blk_mq_destroy_queue(ctrl->connect_q); -out_free_tag_set: - if (new) - blk_mq_free_tag_set(ctrl->tagset); + nvme_remove_io_tag_set(ctrl); out_free_io_queues: nvme_tcp_free_io_queues(ctrl); return ret; @@ -1955,11 +1917,8 @@ out_free_io_queues: static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_queue(ctrl, 0); - if (remove) { - blk_mq_destroy_queue(ctrl->admin_q); - blk_mq_destroy_queue(ctrl->fabrics_q); - blk_mq_free_tag_set(ctrl->admin_tagset); - } + if (remove) + nvme_remove_admin_tag_set(ctrl); nvme_tcp_free_admin_queue(ctrl); } @@ -1972,26 +1931,17 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) return error; if (new) { - error = nvme_tcp_alloc_admin_tag_set(ctrl); + error = nvme_alloc_admin_tag_set(ctrl, + &to_tcp_ctrl(ctrl)->admin_tag_set, + &nvme_tcp_admin_mq_ops, BLK_MQ_F_BLOCKING, + sizeof(struct nvme_tcp_request)); if (error) goto out_free_queue; - - ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset); - if (IS_ERR(ctrl->fabrics_q)) { - error = PTR_ERR(ctrl->fabrics_q); - goto out_free_tagset; - } - - ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset); - if (IS_ERR(ctrl->admin_q)) { - error = PTR_ERR(ctrl->admin_q); - goto out_cleanup_fabrics_q; - } } error = nvme_tcp_start_queue(ctrl, 0); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; error = nvme_enable_ctrl(ctrl); if (error) @@ -2011,15 +1961,9 @@ out_quiesce_queue: out_stop_queue: nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); -out_cleanup_queue: - if (new) - blk_mq_destroy_queue(ctrl->admin_q); -out_cleanup_fabrics_q: +out_cleanup_tagset: if (new) - blk_mq_destroy_queue(ctrl->fabrics_q); -out_free_tagset: - if (new) - blk_mq_free_tag_set(ctrl->admin_tagset); + nvme_remove_admin_tag_set(ctrl); out_free_queue: nvme_tcp_free_admin_queue(ctrl); return error; @@ -2468,9 +2412,9 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) +static void nvme_tcp_map_queues(struct blk_mq_tag_set *set) { - struct nvme_tcp_ctrl *ctrl = set->driver_data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { @@ -2509,8 +2453,6 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) ctrl->io_queues[HCTX_TYPE_DEFAULT], ctrl->io_queues[HCTX_TYPE_READ], ctrl->io_queues[HCTX_TYPE_POLL]); - - return 0; } static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) @@ -2529,6 +2471,25 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) return queue->nr_cqe; } +static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) +{ + struct nvme_tcp_queue *queue = &to_tcp_ctrl(ctrl)->queues[0]; + struct sockaddr_storage src_addr; + int ret, len; + + len = nvmf_get_address(ctrl, buf, size); + + ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); + if (ret > 0) { + if (len > 0) + len--; /* strip trailing newline */ + len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", + (len) ? "," : "", &src_addr); + } + + return len; +} + static const struct blk_mq_ops nvme_tcp_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .commit_rqs = nvme_tcp_commit_rqs, @@ -2560,7 +2521,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .free_ctrl = nvme_tcp_free_ctrl, .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, - .get_address = nvmf_get_address, + .get_address = nvme_tcp_get_address, .stop_ctrl = nvme_tcp_stop_ctrl, }; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index fc8a957fad0a..c8a061ce3ee5 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -449,7 +449,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); + strscpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* * Max command capsule size is sqe + in-capsule data size. diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 2bcd60758919..e34a2896fedb 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1281,6 +1281,34 @@ static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); #endif +static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->max_qid); +} + +static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item, + const char *page, size_t cnt) +{ + struct nvmet_port *port = to_nvmet_port(item); + u16 qid_max; + + if (nvmet_is_port_enabled(port, __func__)) + return -EACCES; + + if (sscanf(page, "%hu\n", &qid_max) != 1) + return -EINVAL; + + if (qid_max < 1 || qid_max > NVMET_NR_QUEUES) + return -EINVAL; + + down_write(&nvmet_config_sem); + to_subsys(item)->max_qid = qid_max; + up_write(&nvmet_config_sem); + return cnt; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_qid_max); + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, @@ -1288,6 +1316,7 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, + &nvmet_subsys_attr_attr_qid_max, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_subsys_attr_attr_pi_enable, #endif diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 7f4083cf953a..14677145bbba 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -832,6 +832,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) } init_completion(&sq->free_done); init_completion(&sq->confirm_done); + nvmet_auth_sq_init(sq); return 0; } diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index c2162eef8ce1..668d257fa986 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -292,7 +292,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) id->oaes = cpu_to_le32(NVMET_DISC_AEN_CFG_OPTIONAL); - strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); + strscpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index ebdf9aa81041..7970a7640e58 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -23,17 +23,12 @@ static void nvmet_auth_expired_work(struct work_struct *work) sq->dhchap_tid = -1; } -void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req) +void nvmet_auth_sq_init(struct nvmet_sq *sq) { - u32 result = le32_to_cpu(req->cqe->result.u32); - /* Initialize in-band authentication */ - INIT_DELAYED_WORK(&req->sq->auth_expired_work, - nvmet_auth_expired_work); - req->sq->authenticated = false; - req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; - result |= (u32)NVME_CONNECT_AUTHREQ_ATR << 16; - req->cqe->result.u32 = cpu_to_le32(result); + INIT_DELAYED_WORK(&sq->auth_expired_work, nvmet_auth_expired_work); + sq->authenticated = false; + sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; } static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d) @@ -177,7 +172,7 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) return 0; } -static u16 nvmet_auth_failure2(struct nvmet_req *req, void *d) +static u16 nvmet_auth_failure2(void *d) { struct nvmf_auth_dhchap_failure_data *data = d; @@ -229,10 +224,8 @@ void nvmet_execute_auth_send(struct nvmet_req *req) } status = nvmet_copy_from_sgl(req, 0, d, tl); - if (status) { - kfree(d); - goto done; - } + if (status) + goto done_kfree; data = d; pr_debug("%s: ctrl %d qid %d type %d id %d step %x\n", __func__, @@ -310,7 +303,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req) goto done_kfree; break; case NVME_AUTH_DHCHAP_MESSAGE_FAILURE2: - status = nvmet_auth_failure2(req, d); + status = nvmet_auth_failure2(d); if (status) { pr_warn("ctrl %d qid %d: authentication failed (%d)\n", ctrl->cntlid, req->sq->qid, status); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f91a56180d3d..43b5bd8bb6a5 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -198,6 +198,12 @@ err: return ret; } +static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl) +{ + return (u32)ctrl->cntlid | + (nvmet_has_auth(ctrl) ? NVME_CONNECT_AUTHREQ_ATR : 0); +} + static void nvmet_execute_admin_connect(struct nvmet_req *req) { struct nvmf_connect_command *c = &req->cmd->connect; @@ -269,10 +275,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, ctrl->pi_support ? " T10-PI is enabled" : "", nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : ""); - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); - - if (nvmet_has_auth(ctrl)) - nvmet_init_auth(ctrl, req); + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); out: kfree(d); complete: @@ -328,14 +331,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) if (status) goto out_ctrl_put; - /* pass back cntlid for successful completion */ - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); - pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); - if (nvmet_has_auth(ctrl)) - nvmet_init_auth(ctrl, req); - + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); out: kfree(d); complete: diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 2dc1c1035626..c2d6cea0236b 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -12,11 +12,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) { - const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; - /* Number of logical blocks per physical block. */ - const u32 lpp = ql->physical_block_size / ql->logical_block_size; /* Logical blocks per physical block, 0's based. */ - const __le16 lpp0b = to0based(lpp); + const __le16 lpp0b = to0based(bdev_physical_block_size(bdev) / + bdev_logical_block_size(bdev)); /* * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -42,11 +40,12 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ - id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); + id->npdg = to0based(bdev_discard_granularity(bdev) / + bdev_logical_block_size(bdev)); /* NPDG = Namespace Preferred Deallocate Alignment */ id->npda = id->npdg; /* NOWS = Namespace Optimal Write Size */ - id->nows = to0based(ql->io_opt / ql->logical_block_size); + id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev)); } void nvmet_bdev_ns_disable(struct nvmet_ns *ns) @@ -334,6 +333,11 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) { struct bio *bio = &req->b.inline_bio; + if (!bdev_write_cache(req->ns->bdev)) { + nvmet_req_complete(req, NVME_SC_SUCCESS); + return; + } + if (!nvmet_check_transfer_len(req, 0)) return; @@ -347,6 +351,9 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) u16 nvmet_bdev_flush(struct nvmet_req *req) { + if (!bdev_write_cache(req->ns->bdev)) + return 0; + if (blkdev_issue_flush(req->ns->bdev)) return NVME_SC_INTERNAL | NVME_SC_DNR; return 0; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9750a7fca268..b45fe3adf015 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -204,7 +204,7 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_loop_ctrl *ctrl = set->driver_data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data); struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); nvme_req(req)->ctrl = &ctrl->ctrl; @@ -218,7 +218,7 @@ static struct lock_class_key loop_hctx_fq_lock_key; static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_loop_ctrl *ctrl = data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data); struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1]; BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); @@ -238,7 +238,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_loop_ctrl *ctrl = data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data); struct nvme_loop_queue *queue = &ctrl->queues[0]; BUG_ON(hctx_idx != 0); @@ -266,9 +266,7 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) return; nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) @@ -282,10 +280,8 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_loop_ctrl_mutex); - if (nctrl->tagset) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - } + if (nctrl->tagset) + nvme_remove_io_tag_set(nctrl); kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: @@ -350,52 +346,31 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; - ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; - ctrl->queues[0].ctrl = ctrl; error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); if (error) return error; ctrl->ctrl.queue_count = 1; - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_loop_admin_mq_ops, BLK_MQ_F_NO_SCHED, + sizeof(struct nvme_loop_iod) + + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (error) goto out_free_sq; - ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - error = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_tagset; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } /* reset stopped state for the fresh admin queue */ clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->ctrl.flags); error = nvmf_connect_admin_queue(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); error = nvme_enable_ctrl(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; ctrl->ctrl.max_hw_sectors = (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); @@ -404,17 +379,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; return 0; -out_cleanup_queue: +out_cleanup_tagset: clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_sq: nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); return error; @@ -522,37 +493,21 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) if (ret) return ret; - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_loop_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, + &nvme_loop_mq_ops, BLK_MQ_F_SHOULD_MERGE, + sizeof(struct nvme_loop_iod) + + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (ret) goto out_destroy_queues; - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tagset; - ret = nvme_loop_connect_io_queues(ctrl); if (ret) - goto out_cleanup_connect_q; + goto out_cleanup_tagset; return 0; -out_cleanup_connect_q: - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->tag_set); +out_cleanup_tagset: + nvme_remove_io_tag_set(&ctrl->ctrl); out_destroy_queues: nvme_loop_destroy_io_queues(ctrl); return ret; @@ -601,7 +556,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ret = -ENOMEM; - ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ctrl->port = nvme_loop_find_port(&ctrl->ctrl); @@ -621,6 +575,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; } + ctrl->ctrl.sqsize = opts->queue_size - 1; if (opts->nr_io_queues) { ret = nvme_loop_create_io_queues(ctrl); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 6ffeeb0a1c49..dfe3894205aa 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -704,7 +704,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, bool set_ctrl); int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash); int nvmet_setup_auth(struct nvmet_ctrl *ctrl); -void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req); +void nvmet_auth_sq_init(struct nvmet_sq *sq); void nvmet_destroy_auth(struct nvmet_ctrl *ctrl); void nvmet_auth_sq_free(struct nvmet_sq *sq); int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id); @@ -726,8 +726,9 @@ static inline int nvmet_setup_auth(struct nvmet_ctrl *ctrl) { return 0; } -static inline void nvmet_init_auth(struct nvmet_ctrl *ctrl, - struct nvmet_req *req) {}; +static inline void nvmet_auth_sq_init(struct nvmet_sq *sq) +{ +} static inline void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) {}; static inline void nvmet_auth_sq_free(struct nvmet_sq *sq) {}; static inline bool nvmet_check_auth_status(struct nvmet_req *req) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 6f39a29828b1..94d3153bae54 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -215,9 +215,11 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; + struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + u32 effects; int status; - status = nvme_execute_passthru_rq(rq); + status = nvme_execute_passthru_rq(rq, &effects); if (status == NVME_SC_SUCCESS && req->cmd->common.opcode == nvme_admin_identify) { @@ -238,6 +240,9 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) req->cqe->result = nvme_req(rq)->result; nvmet_req_complete(req, status); blk_mq_free_request(rq); + + if (effects) + nvme_passthru_end(ctrl, effects, req->cmd, status); } static void nvmet_passthru_req_done(struct request *rq, diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index a3694a32f6d5..6c1476e086ef 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -77,9 +77,8 @@ struct nvmet_tcp_cmd { u32 pdu_len; u32 pdu_recv; int sg_idx; - int nr_mapped; struct msghdr recv_msg; - struct kvec *iov; + struct bio_vec *iov; u32 flags; struct list_head entry; @@ -165,9 +164,7 @@ static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd) @@ -301,35 +298,21 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) { - WARN_ON(unlikely(cmd->nr_mapped > 0)); - kfree(cmd->iov); sgl_free(cmd->req.sg); cmd->iov = NULL; cmd->req.sg = NULL; } -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) -{ - struct scatterlist *sg; - int i; - - sg = &cmd->req.sg[cmd->sg_idx]; - - for (i = 0; i < cmd->nr_mapped; i++) - kunmap(sg_page(&sg[i])); - - cmd->nr_mapped = 0; -} - -static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) +static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { - struct kvec *iov = cmd->iov; + struct bio_vec *iov = cmd->iov; struct scatterlist *sg; u32 length, offset, sg_offset; + int nr_pages; length = cmd->pdu_len; - cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); + nr_pages = DIV_ROUND_UP(length, PAGE_SIZE); offset = cmd->rbytes_done; cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; @@ -338,8 +321,9 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); - iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset; - iov->iov_len = iov_len; + iov->bv_page = sg_page(sg); + iov->bv_len = sg->length; + iov->bv_offset = sg->offset + sg_offset; length -= iov_len; sg = sg_next(sg); @@ -347,8 +331,8 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) sg_offset = 0; } - iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, - cmd->nr_mapped, cmd->pdu_len); + iov_iter_bvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, + nr_pages, cmd->pdu_len); } static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) @@ -926,7 +910,7 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, } queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); cmd->flags |= NVMET_TCP_F_INIT_FAILED; } @@ -935,10 +919,17 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; - if (likely(queue->nr_cmds)) + if (likely(queue->nr_cmds)) { + if (unlikely(data->ttag >= queue->nr_cmds)) { + pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", + queue->idx, data->ttag, queue->nr_cmds); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } cmd = &queue->cmds[data->ttag]; - else + } else { cmd = &queue->connect; + } if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { pr_err("ttag %u unexpected data offset %u (expected %u)\n", @@ -952,7 +943,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) cmd->pdu_len = le32_to_cpu(data->data_length); cmd->pdu_recv = 0; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); queue->cmd = cmd; queue->rcv_state = NVMET_TCP_RECV_DATA; @@ -976,6 +967,13 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) return nvmet_tcp_handle_icreq(queue); } + if (unlikely(hdr->type == nvme_tcp_icreq)) { + pr_err("queue %d: received icreq pdu in state %d\n", + queue->idx, queue->state); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } + if (hdr->type == nvme_tcp_h2c_data) { ret = nvmet_tcp_handle_h2c_data_pdu(queue); if (unlikely(ret)) @@ -1021,7 +1019,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (nvmet_tcp_need_data_in(queue->cmd)) { if (nvmet_tcp_has_inline_data(queue->cmd)) { queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(queue->cmd); + nvmet_tcp_build_pdu_iovec(queue->cmd); return 0; } /* send back R2T */ @@ -1141,7 +1139,6 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) cmd->rbytes_done += ret; } - nvmet_tcp_unmap_pdu_iovec(cmd); if (queue->data_digest) { nvmet_tcp_prep_recv_ddgst(cmd); return 0; @@ -1179,7 +1176,8 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - nvmet_tcp_finish_cmd(cmd); + nvmet_req_uninit(&cmd->req); + nvmet_tcp_free_cmd_buffers(cmd); nvmet_tcp_fatal_error(queue); ret = -EPROTO; goto out; @@ -1408,13 +1406,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) -{ - nvmet_req_uninit(&cmd->req); - nvmet_tcp_unmap_pdu_iovec(cmd); - nvmet_tcp_free_cmd_buffers(cmd); -} - static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmds; @@ -1423,15 +1414,26 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) for (i = 0; i < queue->nr_cmds; i++, cmd++) { if (nvmet_tcp_need_data_in(cmd)) nvmet_req_uninit(&cmd->req); - - nvmet_tcp_unmap_pdu_iovec(cmd); - nvmet_tcp_free_cmd_buffers(cmd); } if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { /* failed in connect */ - nvmet_tcp_finish_cmd(&queue->connect); + nvmet_req_uninit(&queue->connect.req); + } +} + +static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmds; + int i; + + for (i = 0; i < queue->nr_cmds; i++, cmd++) { + if (nvmet_tcp_need_data_in(cmd)) + nvmet_tcp_free_cmd_buffers(cmd); } + + if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) + nvmet_tcp_free_cmd_buffers(&queue->connect); } static void nvmet_tcp_release_queue_work(struct work_struct *w) @@ -1452,6 +1454,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); cancel_work_sync(&queue->io_work); + nvmet_tcp_free_cmd_data_in_buffers(queue); sock_release(queue->sock); nvmet_tcp_free_cmds(queue); if (queue->hdr_digest || queue->data_digest) diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 835bfda86fcf..1254cf57e008 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -400,7 +400,6 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) { struct block_device *bdev = req->ns->bdev; unsigned int nr_zones = bdev_nr_zones(bdev); - struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = NULL; sector_t sector = 0; int ret; @@ -409,7 +408,7 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) }; d.zbitmap = kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(*(d.zbitmap)), - GFP_NOIO, q->node); + GFP_NOIO, bdev->bd_disk->node_id); if (!d.zbitmap) { ret = -ENOMEM; goto out; |